diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-02-16 20:13:02 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-02-16 20:13:02 +0000 |
commit | b60736ec1405bb0a8dd40989f67ef4c93da068ab (patch) | |
tree | 5c43fbb7c9fc45f0f87e0e6795a86267dbd12f9d /llvm/include | |
parent | cfca06d7963fa0909f90483b42a6d7d194d01e08 (diff) | |
download | src-b60736ec1405bb0a8dd40989f67ef4c93da068ab.tar.gz src-b60736ec1405bb0a8dd40989f67ef4c93da068ab.zip |
Vendor import of llvm-project main 8e464dd76bef, the last commit beforevendor/llvm-project/llvmorg-12-init-17869-g8e464dd76bef
the upstream release/12.x branch was created.
Diffstat (limited to 'llvm/include')
715 files changed, 36042 insertions, 13272 deletions
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index 2c7b4c6eff10..8274213aa839 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -160,6 +160,7 @@ typedef enum { LLVMVectorTypeKind, /**< Fixed width SIMD vector type */ LLVMMetadataTypeKind, /**< Metadata */ LLVMX86_MMXTypeKind, /**< X86 MMX */ + LLVMX86_AMXTypeKind, /**< X86 AMX */ LLVMTokenTypeKind, /**< Tokens */ LLVMScalableVectorTypeKind, /**< Scalable SIMD vector type */ LLVMBFloatTypeKind /**< 16 bit brain floating point type */ @@ -269,6 +270,7 @@ typedef enum { LLVMConstantVectorValueKind, LLVMUndefValueValueKind, + LLVMPoisonValueValueKind, LLVMConstantAggregateZeroValueKind, LLVMConstantDataArrayValueKind, LLVMConstantDataVectorValueKind, @@ -626,6 +628,11 @@ LLVMBool LLVMIsEnumAttribute(LLVMAttributeRef A); LLVMBool LLVMIsStringAttribute(LLVMAttributeRef A); /** + * Obtain a Type from a context by its registered name. + */ +LLVMTypeRef LLVMGetTypeByName2(LLVMContextRef C, const char *Name); + +/** * @} */ @@ -866,9 +873,7 @@ LLVMValueRef LLVMGetInlineAsm(LLVMTypeRef Ty, */ LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M); -/** - * Obtain a Type from a module by its registered name. - */ +/** Deprecated: Use LLVMGetTypeByName2 instead. */ LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name); /** @@ -1444,9 +1449,21 @@ unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy); LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount); /** - * Obtain the number of elements in a vector type. + * Create a vector type that contains a defined type and has a scalable + * number of elements. * - * This only works on types that represent vectors. + * The created type will exist in the context thats its element type + * exists in. + * + * @see llvm::ScalableVectorType::get() + */ +LLVMTypeRef LLVMScalableVectorType(LLVMTypeRef ElementType, + unsigned ElementCount); + +/** + * Obtain the (possibly scalable) number of elements in a vector type. + * + * This only works on types that represent vectors (fixed or scalable). * * @see llvm::VectorType::getNumElements() */ @@ -1478,6 +1495,11 @@ LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C); LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C); /** + * Create a X86 AMX type in a context. + */ +LLVMTypeRef LLVMX86AMXTypeInContext(LLVMContextRef C); + +/** * Create a token type in a context. */ LLVMTypeRef LLVMTokenTypeInContext(LLVMContextRef C); @@ -1494,6 +1516,7 @@ LLVMTypeRef LLVMMetadataTypeInContext(LLVMContextRef C); LLVMTypeRef LLVMVoidType(void); LLVMTypeRef LLVMLabelType(void); LLVMTypeRef LLVMX86MMXType(void); +LLVMTypeRef LLVMX86AMXType(void); /** * @} @@ -1550,6 +1573,7 @@ LLVMTypeRef LLVMX86MMXType(void); macro(Function) \ macro(GlobalVariable) \ macro(UndefValue) \ + macro(PoisonValue) \ macro(Instruction) \ macro(UnaryOperator) \ macro(BinaryOperator) \ @@ -1684,6 +1708,11 @@ LLVMBool LLVMIsConstant(LLVMValueRef Val); LLVMBool LLVMIsUndef(LLVMValueRef Val); /** + * Determine whether a value instance is poisonous. + */ +LLVMBool LLVMIsPoison(LLVMValueRef Val); + +/** * Convert value instances between types. * * Internally, an LLVMValueRef is "pinned" to a specific type. This @@ -1842,6 +1871,13 @@ LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty); LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty); /** + * Obtain a constant value referring to a poison value of a type. + * + * @see llvm::PoisonValue::get() + */ +LLVMValueRef LLVMGetPoison(LLVMTypeRef Ty); + +/** * Determine whether a value instance is null. * * @see llvm::Constant::isNullValue() @@ -3636,7 +3672,7 @@ void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest); /* Get the number of clauses on the landingpad instruction */ unsigned LLVMGetNumClauses(LLVMValueRef LandingPad); -/* Get the value of the clause at idnex Idx on the landingpad instruction */ +/* Get the value of the clause at index Idx on the landingpad instruction */ LLVMValueRef LLVMGetClause(LLVMValueRef LandingPad, unsigned Idx); /* Add a catch or filter clause to the landingpad instruction */ @@ -3937,6 +3973,26 @@ LLVMValueRef LLVMBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr, LLVMAtomicOrdering FailureOrdering, LLVMBool SingleThread); +/** + * Get the number of elements in the mask of a ShuffleVector instruction. + */ +unsigned LLVMGetNumMaskElements(LLVMValueRef ShuffleVectorInst); + +/** + * \returns a constant that specifies that the result of a \c ShuffleVectorInst + * is undefined. + */ +int LLVMGetUndefMaskElem(void); + +/** + * Get the mask value at position Elt in the mask of a ShuffleVector + * instruction. + * + * \Returns the result of \c LLVMGetUndefMaskElem() if the mask value is undef + * at that position. + */ +int LLVMGetMaskValue(LLVMValueRef ShuffleVectorInst, unsigned Elt); + LLVMBool LLVMIsAtomicSingleThread(LLVMValueRef AtomicInst); void LLVMSetAtomicSingleThread(LLVMValueRef AtomicInst, LLVMBool SingleThread); diff --git a/llvm/include/llvm-c/DataTypes.h b/llvm/include/llvm-c/DataTypes.h index 0f27ba81865e..4eb0ac97d97e 100644 --- a/llvm/include/llvm-c/DataTypes.h +++ b/llvm/include/llvm-c/DataTypes.h @@ -77,8 +77,4 @@ typedef signed int ssize_t; # define UINT64_MAX 0xffffffffffffffffULL #endif -#ifndef HUGE_VALF -#define HUGE_VALF (float)HUGE_VAL -#endif - #endif /* LLVM_C_DATATYPES_H */ diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index cdf5f5a0cca8..5a9cd8e2ee63 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -159,7 +159,9 @@ enum { LLVMDIImportedEntityMetadataKind, LLVMDIMacroMetadataKind, LLVMDIMacroFileMetadataKind, - LLVMDICommonBlockMetadataKind + LLVMDICommonBlockMetadataKind, + LLVMDIStringTypeMetadataKind, + LLVMDIGenericSubrangeMetadataKind }; typedef unsigned LLVMMetadataKind; diff --git a/llvm/include/llvm-c/Error.h b/llvm/include/llvm-c/Error.h index 92f81bf38304..bc702ac7a1bf 100644 --- a/llvm/include/llvm-c/Error.h +++ b/llvm/include/llvm-c/Error.h @@ -62,6 +62,11 @@ void LLVMDisposeErrorMessage(char *ErrMsg); */ LLVMErrorTypeId LLVMGetStringErrorTypeId(void); +/** + * Create a StringError. + */ +LLVMErrorRef LLVMCreateStringError(const char *ErrMsg); + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/LLJIT.h b/llvm/include/llvm-c/LLJIT.h new file mode 100644 index 000000000000..28eb8bbff96b --- /dev/null +++ b/llvm/include/llvm-c/LLJIT.h @@ -0,0 +1,213 @@ +/*===----------- llvm-c/LLJIT.h - OrcV2 LLJIT C bindings --------*- C++ -*-===*\ +|* *| +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| +|* Exceptions. *| +|* See https://llvm.org/LICENSE.txt for license information. *| +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the C interface to the LLJIT class in *| +|* libLLVMOrcJIT.a, which provides a simple MCJIT-like ORC JIT. *| +|* *| +|* Many exotic languages can interoperate with C code but have a harder time *| +|* with C++ due to name mangling. So in addition to C, this interface enables *| +|* tools written in such languages. *| +|* *| +|* Note: This interface is experimental. It is *NOT* stable, and may be *| +|* changed without warning. Only C API usage documentation is *| +|* provided. See the C++ documentation for all higher level ORC API *| +|* details. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_LLJIT_H +#define LLVM_C_LLJIT_H + +#include "llvm-c/Error.h" +#include "llvm-c/Orc.h" +#include "llvm-c/TargetMachine.h" +#include "llvm-c/Types.h" + +LLVM_C_EXTERN_C_BEGIN + +/** + * A function for constructing an ObjectLinkingLayer instance to be used + * by an LLJIT instance. + * + * Clients can call LLVMOrcLLJITBuilderSetObjectLinkingLayerCreator to + * set the creator function to use when constructing an LLJIT instance. + * This can be used to override the default linking layer implementation + * that would otherwise be chosen by LLJITBuilder. + * + * Object linking layers returned by this function will become owned by the + * LLJIT instance. The client is not responsible for managing their lifetimes + * after the function returns. + */ +typedef LLVMOrcObjectLayerRef ( + *LLVMOrcLLJITBuilderObjectLinkingLayerCreatorFunction)( + void *Ctx, LLVMOrcExecutionSessionRef ES, const char *Triple); + +/** + * A reference to an orc::LLJITBuilder instance. + */ +typedef struct LLVMOrcOpaqueLLJITBuilder *LLVMOrcLLJITBuilderRef; + +/** + * A reference to an orc::LLJIT instance. + */ +typedef struct LLVMOrcOpaqueLLJIT *LLVMOrcLLJITRef; + +/** + * Create an LLVMOrcLLJITBuilder. + * + * The client owns the resulting LLJITBuilder and should dispose of it using + * LLVMOrcDisposeLLJITBuilder once they are done with it. + */ +LLVMOrcLLJITBuilderRef LLVMOrcCreateLLJITBuilder(void); + +/** + * Dispose of an LLVMOrcLLJITBuilderRef. This should only be called if ownership + * has not been passed to LLVMOrcCreateLLJIT (e.g. because some error prevented + * that function from being called). + */ +void LLVMOrcDisposeLLJITBuilder(LLVMOrcLLJITBuilderRef Builder); + +/** + * Set the JITTargetMachineBuilder to be used when constructing the LLJIT + * instance. Calling this function is optional: if it is not called then the + * LLJITBuilder will use JITTargeTMachineBuilder::detectHost to construct a + * JITTargetMachineBuilder. + */ +void LLVMOrcLLJITBuilderSetJITTargetMachineBuilder( + LLVMOrcLLJITBuilderRef Builder, LLVMOrcJITTargetMachineBuilderRef JTMB); + +/** + * Set an ObjectLinkingLayer creator function for this LLJIT instance. + */ +void LLVMOrcLLJITBuilderSetObjectLinkingLayerCreator( + LLVMOrcLLJITBuilderRef Builder, + LLVMOrcLLJITBuilderObjectLinkingLayerCreatorFunction F, void *Ctx); + +/** + * Create an LLJIT instance from an LLJITBuilder. + * + * This operation takes ownership of the Builder argument: clients should not + * dispose of the builder after calling this function (even if the function + * returns an error). If a null Builder argument is provided then a + * default-constructed LLJITBuilder will be used. + * + * On success the resulting LLJIT instance is uniquely owned by the client and + * automatically manages the memory of all JIT'd code and all modules that are + * transferred to it (e.g. via LLVMOrcLLJITAddLLVMIRModule). Disposing of the + * LLJIT instance will free all memory managed by the JIT, including JIT'd code + * and not-yet compiled modules. + */ +LLVMErrorRef LLVMOrcCreateLLJIT(LLVMOrcLLJITRef *Result, + LLVMOrcLLJITBuilderRef Builder); + +/** + * Dispose of an LLJIT instance. + */ +LLVMErrorRef LLVMOrcDisposeLLJIT(LLVMOrcLLJITRef J); + +/** + * Get a reference to the ExecutionSession for this LLJIT instance. + * + * The ExecutionSession is owned by the LLJIT instance. The client is not + * responsible for managing its memory. + */ +LLVMOrcExecutionSessionRef LLVMOrcLLJITGetExecutionSession(LLVMOrcLLJITRef J); + +/** + * Return a reference to the Main JITDylib. + * + * The JITDylib is owned by the LLJIT instance. The client is not responsible + * for managing its memory. + */ +LLVMOrcJITDylibRef LLVMOrcLLJITGetMainJITDylib(LLVMOrcLLJITRef J); + +/** + * Return the target triple for this LLJIT instance. This string is owned by + * the LLJIT instance and should not be freed by the client. + */ +const char *LLVMOrcLLJITGetTripleString(LLVMOrcLLJITRef J); + +/** + * Returns the global prefix character according to the LLJIT's DataLayout. + */ +char LLVMOrcLLJITGetGlobalPrefix(LLVMOrcLLJITRef J); + +/** + * Mangles the given string according to the LLJIT instance's DataLayout, then + * interns the result in the SymbolStringPool and returns a reference to the + * pool entry. Clients should call LLVMOrcReleaseSymbolStringPoolEntry to + * decrement the ref-count on the pool entry once they are finished with this + * value. + */ +LLVMOrcSymbolStringPoolEntryRef +LLVMOrcLLJITMangleAndIntern(LLVMOrcLLJITRef J, const char *UnmangledName); + +/** + * Add a buffer representing an object file to the given JITDylib in the given + * LLJIT instance. This operation transfers ownership of the buffer to the + * LLJIT instance. The buffer should not be disposed of or referenced once this + * function returns. + * + * Resources associated with the given object will be tracked by the given + * JITDylib's default resource tracker. + */ +LLVMErrorRef LLVMOrcLLJITAddObjectFile(LLVMOrcLLJITRef J, LLVMOrcJITDylibRef JD, + LLVMMemoryBufferRef ObjBuffer); + +/** + * Add a buffer representing an object file to the given ResourceTracker's + * JITDylib in the given LLJIT instance. This operation transfers ownership of + * the buffer to the LLJIT instance. The buffer should not be disposed of or + * referenced once this function returns. + * + * Resources associated with the given object will be tracked by ResourceTracker + * RT. + */ +LLVMErrorRef LLVMOrcLLJITAddObjectFileWithRT(LLVMOrcLLJITRef J, + LLVMOrcResourceTrackerRef RT, + LLVMMemoryBufferRef ObjBuffer); + +/** + * Add an IR module to the given JITDylib in the given LLJIT instance. This + * operation transfers ownership of the TSM argument to the LLJIT instance. + * The TSM argument should not be disposed of or referenced once this + * function returns. + * + * Resources associated with the given Module will be tracked by the given + * JITDylib's default resource tracker. + */ +LLVMErrorRef LLVMOrcLLJITAddLLVMIRModule(LLVMOrcLLJITRef J, + LLVMOrcJITDylibRef JD, + LLVMOrcThreadSafeModuleRef TSM); + +/** + * Add an IR module to the given ResourceTracker's JITDylib in the given LLJIT + * instance. This operation transfers ownership of the TSM argument to the LLJIT + * instance. The TSM argument should not be disposed of or referenced once this + * function returns. + * + * Resources associated with the given Module will be tracked by ResourceTracker + * RT. + */ +LLVMErrorRef LLVMOrcLLJITAddLLVMIRModuleWithRT(LLVMOrcLLJITRef J, + LLVMOrcResourceTrackerRef JD, + LLVMOrcThreadSafeModuleRef TSM); + +/** + * Look up the given symbol in the main JITDylib of the given LLJIT instance. + * + * This operation does not take ownership of the Name argument. + */ +LLVMErrorRef LLVMOrcLLJITLookup(LLVMOrcLLJITRef J, + LLVMOrcJITTargetAddress *Result, + const char *Name); + +LLVM_C_EXTERN_C_END + +#endif /* LLVM_C_LLJIT_H */ diff --git a/llvm/include/llvm-c/LinkTimeOptimizer.h b/llvm/include/llvm-c/LinkTimeOptimizer.h deleted file mode 100644 index 9ae65b8fe5e0..000000000000 --- a/llvm/include/llvm-c/LinkTimeOptimizer.h +++ /dev/null @@ -1,66 +0,0 @@ -//===-- llvm/LinkTimeOptimizer.h - LTO Public C Interface -------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This header provides a C API to use the LLVM link time optimization -// library. This is intended to be used by linkers which are C-only in -// their implementation for performing LTO. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_C_LINKTIMEOPTIMIZER_H -#define LLVM_C_LINKTIMEOPTIMIZER_H - -#include "llvm-c/ExternC.h" - -LLVM_C_EXTERN_C_BEGIN - -/** - * @defgroup LLVMCLinkTimeOptimizer Link Time Optimization - * @ingroup LLVMC - * - * @{ - */ - - /// This provides a dummy type for pointers to the LTO object. - typedef void* llvm_lto_t; - - /// This provides a C-visible enumerator to manage status codes. - /// This should map exactly onto the C++ enumerator LTOStatus. - typedef enum llvm_lto_status { - LLVM_LTO_UNKNOWN, - LLVM_LTO_OPT_SUCCESS, - LLVM_LTO_READ_SUCCESS, - LLVM_LTO_READ_FAILURE, - LLVM_LTO_WRITE_FAILURE, - LLVM_LTO_NO_TARGET, - LLVM_LTO_NO_WORK, - LLVM_LTO_MODULE_MERGE_FAILURE, - LLVM_LTO_ASM_FAILURE, - - // Added C-specific error codes - LLVM_LTO_NULL_OBJECT - } llvm_lto_status_t; - - /// This provides C interface to initialize link time optimizer. This allows - /// linker to use dlopen() interface to dynamically load LinkTimeOptimizer. - /// extern "C" helps, because dlopen() interface uses name to find the symbol. - extern llvm_lto_t llvm_create_optimizer(void); - extern void llvm_destroy_optimizer(llvm_lto_t lto); - - extern llvm_lto_status_t llvm_read_object_file - (llvm_lto_t lto, const char* input_filename); - extern llvm_lto_status_t llvm_optimize_modules - (llvm_lto_t lto, const char* output_filename); - -/** - * @} - */ - - LLVM_C_EXTERN_C_END - -#endif diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h index 09a058846108..183107c148a6 100644 --- a/llvm/include/llvm-c/Orc.h +++ b/llvm/include/llvm-c/Orc.h @@ -39,32 +39,200 @@ LLVM_C_EXTERN_C_BEGIN typedef uint64_t LLVMOrcJITTargetAddress; /** + * Represents generic linkage flags for a symbol definition. + */ +typedef enum { + LLVMJITSymbolGenericFlagsExported = 1U << 0, + LLVMJITSymbolGenericFlagsWeak = 1U << 1 +} LLVMJITSymbolGenericFlags; + +/** + * Represents target specific flags for a symbol definition. + */ +typedef uint8_t LLVMJITTargetSymbolFlags; + +/** + * Represents the linkage flags for a symbol definition. + */ +typedef struct { + uint8_t GenericFlags; + uint8_t TargetFlags; +} LLVMJITSymbolFlags; + +/** + * Represents an evaluated symbol address and flags. + */ +typedef struct { + LLVMOrcJITTargetAddress Address; + LLVMJITSymbolFlags Flags; +} LLVMJITEvaluatedSymbol; + +/** * A reference to an orc::ExecutionSession instance. */ typedef struct LLVMOrcOpaqueExecutionSession *LLVMOrcExecutionSessionRef; /** + * Error reporter function. + */ +typedef void (*LLVMOrcErrorReporterFunction)(void *Ctx, LLVMErrorRef Err); + +/** + * A reference to an orc::SymbolStringPool. + */ +typedef struct LLVMOrcOpaqueSymbolStringPool *LLVMOrcSymbolStringPoolRef; + +/** * A reference to an orc::SymbolStringPool table entry. */ -typedef struct LLVMOrcQuaqueSymbolStringPoolEntryPtr +typedef struct LLVMOrcOpaqueSymbolStringPoolEntry *LLVMOrcSymbolStringPoolEntryRef; /** + * Represents a pair of a symbol name and an evaluated symbol. + */ +typedef struct { + LLVMOrcSymbolStringPoolEntryRef Name; + LLVMJITEvaluatedSymbol Sym; +} LLVMJITCSymbolMapPair; + +/** + * Represents a list of (SymbolStringPtr, JITEvaluatedSymbol) pairs that can be + * used to construct a SymbolMap. + */ +typedef LLVMJITCSymbolMapPair *LLVMOrcCSymbolMapPairs; + +/** + * Lookup kind. This can be used by definition generators when deciding whether + * to produce a definition for a requested symbol. + * + * This enum should be kept in sync with llvm::orc::LookupKind. + */ +typedef enum { + LLVMOrcLookupKindStatic, + LLVMOrcLookupKindDLSym +} LLVMOrcLookupKind; + +/** + * JITDylib lookup flags. This can be used by definition generators when + * deciding whether to produce a definition for a requested symbol. + * + * This enum should be kept in sync with llvm::orc::JITDylibLookupFlags. + */ +typedef enum { + LLVMOrcJITDylibLookupFlagsMatchExportedSymbolsOnly, + LLVMOrcJITDylibLookupFlagsMatchAllSymbols +} LLVMOrcJITDylibLookupFlags; + +/** + * Symbol lookup flags for lookup sets. This should be kept in sync with + * llvm::orc::SymbolLookupFlags. + */ +typedef enum { + LLVMOrcSymbolLookupFlagsRequiredSymbol, + LLVMOrcSymbolLookupFlagsWeaklyReferencedSymbol +} LLVMOrcSymbolLookupFlags; + +/** + * An element type for a symbol lookup set. + */ +typedef struct { + LLVMOrcSymbolStringPoolEntryRef Name; + LLVMOrcSymbolLookupFlags LookupFlags; +} LLVMOrcCLookupSetElement; + +/** + * A set of symbols to look up / generate. + * + * The list is terminated with an element containing a null pointer for the + * Name field. + * + * If a client creates an instance of this type then they are responsible for + * freeing it, and for ensuring that all strings have been retained over the + * course of its life. Clients receiving a copy from a callback are not + * responsible for managing lifetime or retain counts. + */ +typedef LLVMOrcCLookupSetElement *LLVMOrcCLookupSet; + +/** + * A reference to an orc::MaterializationUnit. + */ +typedef struct LLVMOrcOpaqueMaterializationUnit *LLVMOrcMaterializationUnitRef; + +/** * A reference to an orc::JITDylib instance. */ typedef struct LLVMOrcOpaqueJITDylib *LLVMOrcJITDylibRef; /** - * A reference to an orc::JITDylib::DefinitionGenerator. + * A reference to an orc::ResourceTracker instance. + */ +typedef struct LLVMOrcOpaqueResourceTracker *LLVMOrcResourceTrackerRef; + +/** + * A reference to an orc::DefinitionGenerator. + */ +typedef struct LLVMOrcOpaqueDefinitionGenerator + *LLVMOrcDefinitionGeneratorRef; + +/** + * An opaque lookup state object. Instances of this type can be captured to + * suspend a lookup while a custom generator function attempts to produce a + * definition. + * + * If a client captures a lookup state object then they must eventually call + * LLVMOrcLookupStateContinueLookup to restart the lookup. This is required + * in order to release memory allocated for the lookup state, even if errors + * have occurred while the lookup was suspended (if these errors have made the + * lookup impossible to complete then it will issue its own error before + * destruction). + */ +typedef struct LLVMOrcOpaqueLookupState *LLVMOrcLookupStateRef; + +/** + * A custom generator function. This can be used to create a custom generator + * object using LLVMOrcCreateCustomCAPIDefinitionGenerator. The resulting + * object can be attached to a JITDylib, via LLVMOrcJITDylibAddGenerator, to + * receive callbacks when lookups fail to match existing definitions. + * + * GeneratorObj will contain the address of the custom generator object. + * + * Ctx will contain the context object passed to + * LLVMOrcCreateCustomCAPIDefinitionGenerator. + * + * LookupState will contain a pointer to an LLVMOrcLookupStateRef object. This + * can optionally be modified to make the definition generation process + * asynchronous: If the LookupStateRef value is copied, and the original + * LLVMOrcLookupStateRef set to null, the lookup will be suspended. Once the + * asynchronous definition process has been completed clients must call + * LLVMOrcLookupStateContinueLookup to continue the lookup (this should be + * done unconditionally, even if errors have occurred in the mean time, to + * free the lookup state memory and notify the query object of the failures. If + * LookupState is captured this function must return LLVMErrorSuccess. + * + * The Kind argument can be inspected to determine the lookup kind (e.g. + * as-if-during-static-link, or as-if-during-dlsym). + * + * The JD argument specifies which JITDylib the definitions should be generated + * into. + * + * The JDLookupFlags argument can be inspected to determine whether the original + * lookup included non-exported symobls. + * + * Finally, the LookupSet argument contains the set of symbols that could not + * be found in JD already (the set of generation candidates). */ -typedef struct LLVMOrcOpaqueJITDylibDefinitionGenerator - *LLVMOrcJITDylibDefinitionGeneratorRef; +typedef LLVMErrorRef (*LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction)( + LLVMOrcDefinitionGeneratorRef GeneratorObj, void *Ctx, + LLVMOrcLookupStateRef *LookupState, LLVMOrcLookupKind Kind, + LLVMOrcJITDylibRef JD, LLVMOrcJITDylibLookupFlags JDLookupFlags, + LLVMOrcCLookupSet LookupSet, size_t LookupSetSize); /** * Predicate function for SymbolStringPoolEntries. */ -typedef int (*LLVMOrcSymbolPredicate)(LLVMOrcSymbolStringPoolEntryRef Sym, - void *Ctx); +typedef int (*LLVMOrcSymbolPredicate)(void *Ctx, + LLVMOrcSymbolStringPoolEntryRef Sym); /** * A reference to an orc::ThreadSafeContext instance. @@ -83,14 +251,43 @@ typedef struct LLVMOrcOpaqueJITTargetMachineBuilder *LLVMOrcJITTargetMachineBuilderRef; /** - * A reference to an orc::LLJITBuilder instance. + * A reference to an orc::ObjectLayer instance. + */ +typedef struct LLVMOrcOpaqueObjectLayer *LLVMOrcObjectLayerRef; + +/** + * Attach a custom error reporter function to the ExecutionSession. + * + * The error reporter will be called to deliver failure notices that can not be + * directly reported to a caller. For example, failure to resolve symbols in + * the JIT linker is typically reported via the error reporter (callers + * requesting definitions from the JIT will typically be delivered a + * FailureToMaterialize error instead). + */ +void LLVMOrcExecutionSessionSetErrorReporter( + LLVMOrcExecutionSessionRef ES, LLVMOrcErrorReporterFunction ReportError, + void *Ctx); + +/** + * Return a reference to the SymbolStringPool for an ExecutionSession. + * + * Ownership of the pool remains with the ExecutionSession: The caller is + * not required to free the pool. */ -typedef struct LLVMOrcOpaqueLLJITBuilder *LLVMOrcLLJITBuilderRef; +LLVMOrcSymbolStringPoolRef +LLVMOrcExecutionSessionGetSymbolStringPool(LLVMOrcExecutionSessionRef ES); /** - * A reference to an orc::LLJIT instance. + * Clear all unreferenced symbol string pool entries. + * + * This can be called at any time to release unused entries in the + * ExecutionSession's string pool. Since it locks the pool (preventing + * interning of any new strings) it is recommended that it only be called + * infrequently, ideally when the caller has reason to believe that some + * entries will have become unreferenced, e.g. after removing a module or + * closing a JITDylib. */ -typedef struct LLVMOrcOpaqueLLJIT *LLVMOrcLLJITRef; +void LLVMOrcSymbolStringPoolClearDeadEntries(LLVMOrcSymbolStringPoolRef SSP); /** * Intern a string in the ExecutionSession's SymbolStringPool and return a @@ -108,26 +305,137 @@ LLVMOrcSymbolStringPoolEntryRef LLVMOrcExecutionSessionIntern(LLVMOrcExecutionSessionRef ES, const char *Name); /** + * Increments the ref-count for a SymbolStringPool entry. + */ +void LLVMOrcRetainSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S); + +/** * Reduces the ref-count for of a SymbolStringPool entry. */ void LLVMOrcReleaseSymbolStringPoolEntry(LLVMOrcSymbolStringPoolEntryRef S); +const char *LLVMOrcSymbolStringPoolEntryStr(LLVMOrcSymbolStringPoolEntryRef S); + +/** + * Reduces the ref-count of a ResourceTracker. + */ +void LLVMOrcReleaseResourceTracker(LLVMOrcResourceTrackerRef RT); + +/** + * Transfers tracking of all resources associated with resource tracker SrcRT + * to resource tracker DstRT. + */ +void LLVMOrcResourceTrackerTransferTo(LLVMOrcResourceTrackerRef SrcRT, + LLVMOrcResourceTrackerRef DstRT); + +/** + * Remove all resources associated with the given tracker. See + * ResourceTracker::remove(). + */ +LLVMErrorRef LLVMOrcResourceTrackerRemove(LLVMOrcResourceTrackerRef RT); + /** * Dispose of a JITDylib::DefinitionGenerator. This should only be called if * ownership has not been passed to a JITDylib (e.g. because some error * prevented the client from calling LLVMOrcJITDylibAddGenerator). */ -void LLVMOrcDisposeJITDylibDefinitionGenerator( - LLVMOrcJITDylibDefinitionGeneratorRef DG); +void LLVMOrcDisposeDefinitionGenerator( + LLVMOrcDefinitionGeneratorRef DG); /** - * Add a JITDylib::DefinitionGenerator to the given JITDylib. + * Dispose of a MaterializationUnit. + */ +void LLVMOrcDisposeMaterializationUnit(LLVMOrcMaterializationUnitRef MU); + +/** + * Create a MaterializationUnit to define the given symbols as pointing to + * the corresponding raw addresses. + */ +LLVMOrcMaterializationUnitRef +LLVMOrcAbsoluteSymbols(LLVMOrcCSymbolMapPairs Syms, size_t NumPairs); + +/** + * Create a "bare" JITDylib. + * + * The client is responsible for ensuring that the JITDylib's name is unique, + * e.g. by calling LLVMOrcExecutionSessionGetJTIDylibByName first. + * + * This call does not install any library code or symbols into the newly + * created JITDylib. The client is responsible for all configuration. + */ +LLVMOrcJITDylibRef +LLVMOrcExecutionSessionCreateBareJITDylib(LLVMOrcExecutionSessionRef ES, + const char *Name); + +/** + * Create a JITDylib. + * + * The client is responsible for ensuring that the JITDylib's name is unique, + * e.g. by calling LLVMOrcExecutionSessionGetJTIDylibByName first. + * + * If a Platform is attached to the ExecutionSession then + * Platform::setupJITDylib will be called to install standard platform symbols + * (e.g. standard library interposes). If no Platform is installed then this + * call is equivalent to LLVMExecutionSessionRefCreateBareJITDylib and will + * always return success. + */ +LLVMErrorRef +LLVMOrcExecutionSessionCreateJITDylib(LLVMOrcExecutionSessionRef ES, + LLVMOrcJITDylibRef *Result, + const char *Name); + +/** + * Returns the JITDylib with the given name, or NULL if no such JITDylib + * exists. + */ +LLVMOrcJITDylibRef LLVMOrcExecutionSessionGetJITDylibByName(const char *Name); + +/** + * Return a reference to a newly created resource tracker associated with JD. + * The tracker is returned with an initial ref-count of 1, and must be released + * with LLVMOrcReleaseResourceTracker when no longer needed. + */ +LLVMOrcResourceTrackerRef +LLVMOrcJITDylibCreateResourceTracker(LLVMOrcJITDylibRef JD); + +/** + * Return a reference to the default resource tracker for the given JITDylib. + * This operation will increase the retain count of the tracker: Clients should + * call LLVMOrcReleaseResourceTracker when the result is no longer needed. + */ +LLVMOrcResourceTrackerRef +LLVMOrcJITDylibGetDefaultResourceTracker(LLVMOrcJITDylibRef JD); + +/** + * Add the given MaterializationUnit to the given JITDylib. + * + * If this operation succeeds then JITDylib JD will take ownership of MU. + * If the operation fails then ownership remains with the caller who should + * call LLVMOrcDisposeMaterializationUnit to destroy it. + */ +LLVMErrorRef LLVMOrcJITDylibDefine(LLVMOrcJITDylibRef JD, + LLVMOrcMaterializationUnitRef MU); + +/** + * Calls remove on all trackers associated with this JITDylib, see + * JITDylib::clear(). + */ +LLVMErrorRef LLVMOrcJITDylibClear(LLVMOrcJITDylibRef JD); + +/** + * Add a DefinitionGenerator to the given JITDylib. * * The JITDylib will take ownership of the given generator: The client is no * longer responsible for managing its memory. */ void LLVMOrcJITDylibAddGenerator(LLVMOrcJITDylibRef JD, - LLVMOrcJITDylibDefinitionGeneratorRef DG); + LLVMOrcDefinitionGeneratorRef DG); + +/** + * Create a custom generator. + */ +LLVMOrcDefinitionGeneratorRef LLVMOrcCreateCustomCAPIDefinitionGenerator( + LLVMOrcCAPIDefinitionGeneratorTryToGenerateFunction F, void *Ctx); /** * Get a DynamicLibrarySearchGenerator that will reflect process symbols into @@ -148,7 +456,7 @@ void LLVMOrcJITDylibAddGenerator(LLVMOrcJITDylibRef JD, * the global prefix if present. */ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess( - LLVMOrcJITDylibDefinitionGeneratorRef *Result, char GlobalPrefx, + LLVMOrcDefinitionGeneratorRef *Result, char GlobalPrefx, LLVMOrcSymbolPredicate Filter, void *FilterCtx); /** @@ -156,7 +464,7 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess( * * Ownership of the underlying ThreadSafeContext data is shared: Clients * can and should dispose of their ThreadSafeContext as soon as they no longer - * need to refer to it directly. Other references (e.g. from ThreadSafeModules + * need to refer to it directly. Other references (e.g. from ThreadSafeModules) * will keep the data alive as long as it is needed. */ LLVMOrcThreadSafeContextRef LLVMOrcCreateNewThreadSafeContext(void); @@ -178,7 +486,7 @@ void LLVMOrcDisposeThreadSafeContext(LLVMOrcThreadSafeContextRef TSCtx); * after this function returns. * * Ownership of the ThreadSafeModule is unique: If it is transferred to the JIT - * (e.g. by LLVMOrcLLJITAddLLVMIRModule), in which case the client is no longer + * (e.g. by LLVMOrcLLJITAddLLVMIRModule) then the client is no longer * responsible for it. If it is not transferred to the JIT then the client * should call LLVMOrcDisposeThreadSafeModule to dispose of it. */ @@ -221,114 +529,9 @@ void LLVMOrcDisposeJITTargetMachineBuilder( LLVMOrcJITTargetMachineBuilderRef JTMB); /** - * Create an LLJITTargetMachineBuilder. - * - * The client owns the resulting LLJITBuilder and should dispose of it using - * LLVMOrcDisposeLLJITBuilder once they are done with it. - */ -LLVMOrcLLJITBuilderRef LLVMOrcCreateLLJITBuilder(void); - -/** - * Dispose of an LLVMOrcLLJITBuilderRef. This should only be called if ownership - * has not been passed to LLVMOrcCreateLLJIT (e.g. because some error prevented - * that function from being called). - */ -void LLVMOrcDisposeLLJITBuilder(LLVMOrcLLJITBuilderRef Builder); - -/** - * Set the JITTargetMachineBuilder to be used when constructing the LLJIT - * instance. Calling this function is optional: if it is not called then the - * LLJITBuilder will use JITTargeTMachineBuilder::detectHost to construct a - * JITTargetMachineBuilder. - */ -void LLVMOrcLLJITBuilderSetJITTargetMachineBuilder( - LLVMOrcLLJITBuilderRef Builder, LLVMOrcJITTargetMachineBuilderRef JTMB); - -/** - * Create an LLJIT instance from an LLJITBuilder. - * - * This operation takes ownership of the Builder argument: clients should not - * dispose of the builder after calling this function (even if the function - * returns an error). If a null Builder argument is provided then a - * default-constructed LLJITBuilder will be used. - * - * On success the resulting LLJIT instance is uniquely owned by the client and - * automatically manages the memory of all JIT'd code and all modules that are - * transferred to it (e.g. via LLVMOrcLLJITAddLLVMIRModule). Disposing of the - * LLJIT instance will free all memory managed by the JIT, including JIT'd code - * and not-yet compiled modules. - */ -LLVMErrorRef LLVMOrcCreateLLJIT(LLVMOrcLLJITRef *Result, - LLVMOrcLLJITBuilderRef Builder); - -/** - * Dispose of an LLJIT instance. - */ -LLVMErrorRef LLVMOrcDisposeLLJIT(LLVMOrcLLJITRef J); - -/** - * Get a reference to the ExecutionSession for this LLJIT instance. - * - * The ExecutionSession is owned by the LLJIT instance. The client is not - * responsible for managing its memory. - */ -LLVMOrcExecutionSessionRef LLVMOrcLLJITGetExecutionSession(LLVMOrcLLJITRef J); - -/** - * Return a reference to the Main JITDylib. - * - * The JITDylib is owned by the LLJIT instance. The client is not responsible - * for managing its memory. - */ -LLVMOrcJITDylibRef LLVMOrcLLJITGetMainJITDylib(LLVMOrcLLJITRef J); - -/** - * Return the target triple for this LLJIT instance. This string is owned by - * the LLJIT instance and should not be freed by the client. - */ -const char *LLVMOrcLLJITGetTripleString(LLVMOrcLLJITRef J); - -/** - * Returns the global prefix character according to the LLJIT's DataLayout. - */ -char LLVMOrcLLJITGetGlobalPrefix(LLVMOrcLLJITRef J); - -/** - * Mangles the given string according to the LLJIT instance's DataLayout, then - * interns the result in the SymbolStringPool and returns a reference to the - * pool entry. Clients should call LLVMOrcReleaseSymbolStringPoolEntry to - * decrement the ref-count on the pool entry once they are finished with this - * value. - */ -LLVMOrcSymbolStringPoolEntryRef -LLVMOrcLLJITMangleAndIntern(LLVMOrcLLJITRef J, const char *UnmangledName); - -/** - * Add a buffer representing an object file to the given JITDylib in the given - * LLJIT instance. This operation transfers ownership of the buffer to the - * LLJIT instance. The buffer should not be disposed of or referenced once this - * function returns. - */ -LLVMErrorRef LLVMOrcLLJITAddObjectFile(LLVMOrcLLJITRef J, LLVMOrcJITDylibRef JD, - LLVMMemoryBufferRef ObjBuffer); - -/** - * Add an IR module to the given JITDylib of the given LLJIT instance. This - * operation transfers ownership of the TSM argument to the LLJIT instance. - * The TSM argument should not be 3disposed of or referenced once this - * function returns. - */ -LLVMErrorRef LLVMOrcLLJITAddLLVMIRModule(LLVMOrcLLJITRef J, - LLVMOrcJITDylibRef JD, - LLVMOrcThreadSafeModuleRef TSM); -/** - * Look up the given symbol in the main JITDylib of the given LLJIT instance. - * - * This operation does not take ownership of the Name argument. + * Dispose of an ObjectLayer. */ -LLVMErrorRef LLVMOrcLLJITLookup(LLVMOrcLLJITRef J, - LLVMOrcJITTargetAddress *Result, - const char *Name); +void LLVMOrcDisposeObjectLayer(LLVMOrcObjectLayerRef ObjLayer); LLVM_C_EXTERN_C_END diff --git a/llvm/include/llvm-c/OrcBindings.h b/llvm/include/llvm-c/OrcBindings.h deleted file mode 100644 index 11cdade7c26f..000000000000 --- a/llvm/include/llvm-c/OrcBindings.h +++ /dev/null @@ -1,169 +0,0 @@ -/*===----------- llvm-c/OrcBindings.h - Orc Lib C Iface ---------*- C++ -*-===*\ -|* *| -|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| -|* Exceptions. *| -|* See https://llvm.org/LICENSE.txt for license information. *| -|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| -|* *| -|*===----------------------------------------------------------------------===*| -|* *| -|* This header declares the C interface to libLLVMOrcJIT.a, which implements *| -|* JIT compilation of LLVM IR. *| -|* *| -|* Many exotic languages can interoperate with C code but have a harder time *| -|* with C++ due to name mangling. So in addition to C, this interface enables *| -|* tools written in such languages. *| -|* *| -|* Note: This interface is experimental. It is *NOT* stable, and may be *| -|* changed without warning. *| -|* *| -\*===----------------------------------------------------------------------===*/ - -#ifndef LLVM_C_ORCBINDINGS_H -#define LLVM_C_ORCBINDINGS_H - -#include "llvm-c/Error.h" -#include "llvm-c/ExternC.h" -#include "llvm-c/Object.h" -#include "llvm-c/TargetMachine.h" - -LLVM_C_EXTERN_C_BEGIN - -typedef struct LLVMOrcOpaqueJITStack *LLVMOrcJITStackRef; -typedef uint64_t LLVMOrcModuleHandle; -typedef uint64_t LLVMOrcTargetAddress; -typedef uint64_t (*LLVMOrcSymbolResolverFn)(const char *Name, void *LookupCtx); -typedef uint64_t (*LLVMOrcLazyCompileCallbackFn)(LLVMOrcJITStackRef JITStack, - void *CallbackCtx); - -/** - * Create an ORC JIT stack. - * - * The client owns the resulting stack, and must call OrcDisposeInstance(...) - * to destroy it and free its memory. The JIT stack will take ownership of the - * TargetMachine, which will be destroyed when the stack is destroyed. The - * client should not attempt to dispose of the Target Machine, or it will result - * in a double-free. - */ -LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM); - -/** - * Get the error message for the most recent error (if any). - * - * This message is owned by the ORC JIT Stack and will be freed when the stack - * is disposed of by LLVMOrcDisposeInstance. - */ -const char *LLVMOrcGetErrorMsg(LLVMOrcJITStackRef JITStack); - -/** - * Mangle the given symbol. - * Memory will be allocated for MangledSymbol to hold the result. The client - */ -void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledSymbol, - const char *Symbol); - -/** - * Dispose of a mangled symbol. - */ -void LLVMOrcDisposeMangledSymbol(char *MangledSymbol); - -/** - * Create a lazy compile callback. - */ -LLVMErrorRef LLVMOrcCreateLazyCompileCallback( - LLVMOrcJITStackRef JITStack, LLVMOrcTargetAddress *RetAddr, - LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx); - -/** - * Create a named indirect call stub. - */ -LLVMErrorRef LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack, - const char *StubName, - LLVMOrcTargetAddress InitAddr); - -/** - * Set the pointer for the given indirect stub. - */ -LLVMErrorRef LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack, - const char *StubName, - LLVMOrcTargetAddress NewAddr); - -/** - * Add module to be eagerly compiled. - */ -LLVMErrorRef LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, - LLVMOrcModuleHandle *RetHandle, - LLVMModuleRef Mod, - LLVMOrcSymbolResolverFn SymbolResolver, - void *SymbolResolverCtx); - -/** - * Add module to be lazily compiled one function at a time. - */ -LLVMErrorRef LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, - LLVMOrcModuleHandle *RetHandle, - LLVMModuleRef Mod, - LLVMOrcSymbolResolverFn SymbolResolver, - void *SymbolResolverCtx); - -/** - * Add an object file. - * - * This method takes ownership of the given memory buffer and attempts to add - * it to the JIT as an object file. - * Clients should *not* dispose of the 'Obj' argument: the JIT will manage it - * from this call onwards. - */ -LLVMErrorRef LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack, - LLVMOrcModuleHandle *RetHandle, - LLVMMemoryBufferRef Obj, - LLVMOrcSymbolResolverFn SymbolResolver, - void *SymbolResolverCtx); - -/** - * Remove a module set from the JIT. - * - * This works for all modules that can be added via OrcAdd*, including object - * files. - */ -LLVMErrorRef LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, - LLVMOrcModuleHandle H); - -/** - * Get symbol address from JIT instance. - */ -LLVMErrorRef LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack, - LLVMOrcTargetAddress *RetAddr, - const char *SymbolName); - -/** - * Get symbol address from JIT instance, searching only the specified - * handle. - */ -LLVMErrorRef LLVMOrcGetSymbolAddressIn(LLVMOrcJITStackRef JITStack, - LLVMOrcTargetAddress *RetAddr, - LLVMOrcModuleHandle H, - const char *SymbolName); - -/** - * Dispose of an ORC JIT stack. - */ -LLVMErrorRef LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack); - -/** - * Register a JIT Event Listener. - * - * A NULL listener is ignored. - */ -void LLVMOrcRegisterJITEventListener(LLVMOrcJITStackRef JITStack, LLVMJITEventListenerRef L); - -/** - * Unegister a JIT Event Listener. - * - * A NULL listener is ignored. - */ -void LLVMOrcUnregisterJITEventListener(LLVMOrcJITStackRef JITStack, LLVMJITEventListenerRef L); - -LLVM_C_EXTERN_C_END - -#endif /* LLVM_C_ORCBINDINGS_H */ diff --git a/llvm/include/llvm-c/OrcEE.h b/llvm/include/llvm-c/OrcEE.h new file mode 100644 index 000000000000..2435e7421a42 --- /dev/null +++ b/llvm/include/llvm-c/OrcEE.h @@ -0,0 +1,55 @@ +/*===-- llvm-c/OrcEE.h - OrcV2 C bindings ExecutionEngine utils -*- C++ -*-===*\ +|* *| +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| +|* Exceptions. *| +|* See https://llvm.org/LICENSE.txt for license information. *| +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the C interface to ExecutionEngine based utils, e.g. *| +|* RTDyldObjectLinkingLayer (based on RuntimeDyld) in Orc. *| +|* *| +|* Many exotic languages can interoperate with C code but have a harder time *| +|* with C++ due to name mangling. So in addition to C, this interface enables *| +|* tools written in such languages. *| +|* *| +|* Note: This interface is experimental. It is *NOT* stable, and may be *| +|* changed without warning. Only C API usage documentation is *| +|* provided. See the C++ documentation for all higher level ORC API *| +|* details. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_ORCEE_H +#define LLVM_C_ORCEE_H + +#include "llvm-c/Error.h" +#include "llvm-c/ExecutionEngine.h" +#include "llvm-c/Orc.h" +#include "llvm-c/TargetMachine.h" +#include "llvm-c/Types.h" + +LLVM_C_EXTERN_C_BEGIN + +/** + * Create a RTDyldObjectLinkingLayer instance using the standard + * SectionMemoryManager for memory management. + */ +LLVMOrcObjectLayerRef +LLVMOrcCreateRTDyldObjectLinkingLayerWithSectionMemoryManager( + LLVMOrcExecutionSessionRef ES); + +/** + * Add the given listener to the given RTDyldObjectLinkingLayer. + * + * Note: Layer must be an RTDyldObjectLinkingLayer instance or + * behavior is undefined. + */ +void LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener( + LLVMOrcObjectLayerRef RTDyldObjLinkingLayer, + LLVMJITEventListenerRef Listener); + +LLVM_C_EXTERN_C_END + +#endif /* LLVM_C_ORCEE_H */ diff --git a/llvm/include/llvm-c/Transforms/IPO.h b/llvm/include/llvm-c/Transforms/IPO.h index cde3d2460920..3f2cadf32366 100644 --- a/llvm/include/llvm-c/Transforms/IPO.h +++ b/llvm/include/llvm-c/Transforms/IPO.h @@ -57,9 +57,6 @@ void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM); /** See llvm::createGlobalOptimizerPass function. */ void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM); -/** See llvm::createIPConstantPropagationPass function. */ -void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM); - /** See llvm::createPruneEHPass function. */ void LLVMAddPruneEHPass(LLVMPassManagerRef PM); diff --git a/llvm/include/llvm-c/Transforms/Scalar.h b/llvm/include/llvm-c/Transforms/Scalar.h index 93d79a205195..ba142508bbe4 100644 --- a/llvm/include/llvm-c/Transforms/Scalar.h +++ b/llvm/include/llvm-c/Transforms/Scalar.h @@ -67,6 +67,9 @@ void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM); /** See llvm::createInstructionCombiningPass function. */ void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM); +/** See llvm::createInstSimplifyLegacyPass function. */ +void LLVMAddInstructionSimplifyPass(LLVMPassManagerRef PM); + /** See llvm::createJumpThreadingPass function. */ void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM); @@ -125,9 +128,6 @@ void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM); /** See llvm::createTailCallEliminationPass function. */ void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM); -/** See llvm::createConstantPropagationPass function. */ -void LLVMAddConstantPropagationPass(LLVMPassManagerRef PM); - /** See llvm::demotePromoteMemoryToRegisterPass function. */ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM); diff --git a/llvm/include/llvm/ADT/APFixedPoint.h b/llvm/include/llvm/ADT/APFixedPoint.h new file mode 100644 index 000000000000..d6349e6b2a88 --- /dev/null +++ b/llvm/include/llvm/ADT/APFixedPoint.h @@ -0,0 +1,237 @@ +//===- APFixedPoint.h - Fixed point constant handling -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Defines the fixed point number interface. +/// This is a class for abstracting various operations performed on fixed point +/// types. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_APFIXEDPOINT_H +#define LLVM_ADT_APFIXEDPOINT_H + +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class APFloat; +struct fltSemantics; + +/// The fixed point semantics work similarly to fltSemantics. The width +/// specifies the whole bit width of the underlying scaled integer (with padding +/// if any). The scale represents the number of fractional bits in this type. +/// When HasUnsignedPadding is true and this type is unsigned, the first bit +/// in the value this represents is treated as padding. +class FixedPointSemantics { +public: + FixedPointSemantics(unsigned Width, unsigned Scale, bool IsSigned, + bool IsSaturated, bool HasUnsignedPadding) + : Width(Width), Scale(Scale), IsSigned(IsSigned), + IsSaturated(IsSaturated), HasUnsignedPadding(HasUnsignedPadding) { + assert(Width >= Scale && "Not enough room for the scale"); + assert(!(IsSigned && HasUnsignedPadding) && + "Cannot have unsigned padding on a signed type."); + } + + unsigned getWidth() const { return Width; } + unsigned getScale() const { return Scale; } + bool isSigned() const { return IsSigned; } + bool isSaturated() const { return IsSaturated; } + bool hasUnsignedPadding() const { return HasUnsignedPadding; } + + void setSaturated(bool Saturated) { IsSaturated = Saturated; } + + /// Return the number of integral bits represented by these semantics. These + /// are separate from the fractional bits and do not include the sign or + /// padding bit. + unsigned getIntegralBits() const { + if (IsSigned || (!IsSigned && HasUnsignedPadding)) + return Width - Scale - 1; + else + return Width - Scale; + } + + /// Return the FixedPointSemantics that allows for calculating the full + /// precision semantic that can precisely represent the precision and ranges + /// of both input values. This does not compute the resulting semantics for a + /// given binary operation. + FixedPointSemantics + getCommonSemantics(const FixedPointSemantics &Other) const; + + /// Returns true if this fixed-point semantic with its value bits interpreted + /// as an integer can fit in the given floating point semantic without + /// overflowing to infinity. + /// For example, a signed 8-bit fixed-point semantic has a maximum and + /// minimum integer representation of 127 and -128, respectively. If both of + /// these values can be represented (possibly inexactly) in the floating + /// point semantic without overflowing, this returns true. + bool fitsInFloatSemantics(const fltSemantics &FloatSema) const; + + /// Return the FixedPointSemantics for an integer type. + static FixedPointSemantics GetIntegerSemantics(unsigned Width, + bool IsSigned) { + return FixedPointSemantics(Width, /*Scale=*/0, IsSigned, + /*IsSaturated=*/false, + /*HasUnsignedPadding=*/false); + } + +private: + unsigned Width : 16; + unsigned Scale : 13; + unsigned IsSigned : 1; + unsigned IsSaturated : 1; + unsigned HasUnsignedPadding : 1; +}; + +/// The APFixedPoint class works similarly to APInt/APSInt in that it is a +/// functional replacement for a scaled integer. It is meant to replicate the +/// fixed point types proposed in ISO/IEC JTC1 SC22 WG14 N1169. The class carries +/// info about the fixed point type's width, sign, scale, and saturation, and +/// provides different operations that would normally be performed on fixed point +/// types. +class APFixedPoint { +public: + APFixedPoint(const APInt &Val, const FixedPointSemantics &Sema) + : Val(Val, !Sema.isSigned()), Sema(Sema) { + assert(Val.getBitWidth() == Sema.getWidth() && + "The value should have a bit width that matches the Sema width"); + } + + APFixedPoint(uint64_t Val, const FixedPointSemantics &Sema) + : APFixedPoint(APInt(Sema.getWidth(), Val, Sema.isSigned()), Sema) {} + + // Zero initialization. + APFixedPoint(const FixedPointSemantics &Sema) : APFixedPoint(0, Sema) {} + + APSInt getValue() const { return APSInt(Val, !Sema.isSigned()); } + inline unsigned getWidth() const { return Sema.getWidth(); } + inline unsigned getScale() const { return Sema.getScale(); } + inline bool isSaturated() const { return Sema.isSaturated(); } + inline bool isSigned() const { return Sema.isSigned(); } + inline bool hasPadding() const { return Sema.hasUnsignedPadding(); } + FixedPointSemantics getSemantics() const { return Sema; } + + bool getBoolValue() const { return Val.getBoolValue(); } + + // Convert this number to match the semantics provided. If the overflow + // parameter is provided, set this value to true or false to indicate if this + // operation results in an overflow. + APFixedPoint convert(const FixedPointSemantics &DstSema, + bool *Overflow = nullptr) const; + + // Perform binary operations on a fixed point type. The resulting fixed point + // value will be in the common, full precision semantics that can represent + // the precision and ranges of both input values. See convert() for an + // explanation of the Overflow parameter. + APFixedPoint add(const APFixedPoint &Other, bool *Overflow = nullptr) const; + APFixedPoint sub(const APFixedPoint &Other, bool *Overflow = nullptr) const; + APFixedPoint mul(const APFixedPoint &Other, bool *Overflow = nullptr) const; + APFixedPoint div(const APFixedPoint &Other, bool *Overflow = nullptr) const; + + // Perform shift operations on a fixed point type. Unlike the other binary + // operations, the resulting fixed point value will be in the original + // semantic. + APFixedPoint shl(unsigned Amt, bool *Overflow = nullptr) const; + APFixedPoint shr(unsigned Amt, bool *Overflow = nullptr) const { + // Right shift cannot overflow. + if (Overflow) + *Overflow = false; + return APFixedPoint(Val >> Amt, Sema); + } + + /// Perform a unary negation (-X) on this fixed point type, taking into + /// account saturation if applicable. + APFixedPoint negate(bool *Overflow = nullptr) const; + + /// Return the integral part of this fixed point number, rounded towards + /// zero. (-2.5k -> -2) + APSInt getIntPart() const { + if (Val < 0 && Val != -Val) // Cover the case when we have the min val + return -(-Val >> getScale()); + else + return Val >> getScale(); + } + + /// Return the integral part of this fixed point number, rounded towards + /// zero. The value is stored into an APSInt with the provided width and sign. + /// If the overflow parameter is provided, and the integral value is not able + /// to be fully stored in the provided width and sign, the overflow parameter + /// is set to true. + APSInt convertToInt(unsigned DstWidth, bool DstSign, + bool *Overflow = nullptr) const; + + /// Convert this fixed point number to a floating point value with the + /// provided semantics. + APFloat convertToFloat(const fltSemantics &FloatSema) const; + + void toString(SmallVectorImpl<char> &Str) const; + std::string toString() const { + SmallString<40> S; + toString(S); + return std::string(S.str()); + } + + // If LHS > RHS, return 1. If LHS == RHS, return 0. If LHS < RHS, return -1. + int compare(const APFixedPoint &Other) const; + bool operator==(const APFixedPoint &Other) const { + return compare(Other) == 0; + } + bool operator!=(const APFixedPoint &Other) const { + return compare(Other) != 0; + } + bool operator>(const APFixedPoint &Other) const { return compare(Other) > 0; } + bool operator<(const APFixedPoint &Other) const { return compare(Other) < 0; } + bool operator>=(const APFixedPoint &Other) const { + return compare(Other) >= 0; + } + bool operator<=(const APFixedPoint &Other) const { + return compare(Other) <= 0; + } + + static APFixedPoint getMax(const FixedPointSemantics &Sema); + static APFixedPoint getMin(const FixedPointSemantics &Sema); + + /// Given a floating point semantic, return the next floating point semantic + /// with a larger exponent and larger or equal mantissa. + static const fltSemantics *promoteFloatSemantics(const fltSemantics *S); + + /// Create an APFixedPoint with a value equal to that of the provided integer, + /// and in the same semantics as the provided target semantics. If the value + /// is not able to fit in the specified fixed point semantics, and the + /// overflow parameter is provided, it is set to true. + static APFixedPoint getFromIntValue(const APSInt &Value, + const FixedPointSemantics &DstFXSema, + bool *Overflow = nullptr); + + /// Create an APFixedPoint with a value equal to that of the provided + /// floating point value, in the provided target semantics. If the value is + /// not able to fit in the specified fixed point semantics and the overflow + /// parameter is specified, it is set to true. + /// For NaN, the Overflow flag is always set. For +inf and -inf, if the + /// semantic is saturating, the value saturates. Otherwise, the Overflow flag + /// is set. + static APFixedPoint getFromFloatValue(const APFloat &Value, + const FixedPointSemantics &DstFXSema, + bool *Overflow = nullptr); + +private: + APSInt Val; + FixedPointSemantics Sema; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const APFixedPoint &FX) { + OS << FX.toString(); + return OS; +} + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 876e52c150a0..1f9ac22621a6 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -249,7 +249,7 @@ public: /// \name Constructors /// @{ - IEEEFloat(const fltSemantics &); // Default construct to 0.0 + IEEEFloat(const fltSemantics &); // Default construct to +0.0 IEEEFloat(const fltSemantics &, integerPart); IEEEFloat(const fltSemantics &, uninitializedTag); IEEEFloat(const fltSemantics &, const APInt &); @@ -539,6 +539,9 @@ private: roundingMode) const; opStatus roundSignificandWithExponent(const integerPart *, unsigned int, int, roundingMode); + ExponentType exponentNaN() const; + ExponentType exponentInf() const; + ExponentType exponentZero() const; /// @} diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index f7df648d27ed..b97ea2cd9aee 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -31,6 +31,7 @@ class raw_ostream; template <typename T> class SmallVectorImpl; template <typename T> class ArrayRef; template <typename T> class Optional; +template <typename T> struct DenseMapInfo; class APInt; @@ -96,7 +97,7 @@ private: unsigned BitWidth; ///< The number of bits in this APInt. - friend struct DenseMapAPIntKeyInfo; + friend struct DenseMapInfo<APInt>; friend class APSInt; @@ -764,8 +765,8 @@ public: /// Move assignment operator. APInt &operator=(APInt &&that) { -#ifdef _MSC_VER - // The MSVC std::shuffle implementation still does self-assignment. +#ifdef EXPENSIVE_CHECKS + // Some std::shuffle implementations still do self-assignment. if (this == &that) return *this; #endif @@ -793,11 +794,10 @@ public: APInt &operator=(uint64_t RHS) { if (isSingleWord()) { U.VAL = RHS; - clearUnusedBits(); - } else { - U.pVal[0] = RHS; - memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); + return clearUnusedBits(); } + U.pVal[0] = RHS; + memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); return *this; } @@ -854,10 +854,9 @@ public: APInt &operator|=(uint64_t RHS) { if (isSingleWord()) { U.VAL |= RHS; - clearUnusedBits(); - } else { - U.pVal[0] |= RHS; + return clearUnusedBits(); } + U.pVal[0] |= RHS; return *this; } @@ -884,10 +883,9 @@ public: APInt &operator^=(uint64_t RHS) { if (isSingleWord()) { U.VAL ^= RHS; - clearUnusedBits(); - } else { - U.pVal[0] ^= RHS; + return clearUnusedBits(); } + U.pVal[0] ^= RHS; return *this; } @@ -1405,6 +1403,12 @@ public: /// extended, truncated, or left alone to make it that width. APInt zextOrTrunc(unsigned width) const; + /// Truncate to width + /// + /// Make this APInt have the bit width given by \p width. The value is + /// truncated or left alone to make it that width. + APInt truncOrSelf(unsigned width) const; + /// Sign extend or truncate to width /// /// Make this APInt have the bit width given by \p width. The value is sign @@ -1449,6 +1453,14 @@ public: setBit(BitWidth - 1); } + /// Set a given bit to a given value. + void setBitVal(unsigned BitPosition, bool BitValue) { + if (BitValue) + setBit(BitPosition); + else + clearBit(BitPosition); + } + /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1. /// This function handles "wrap" case when \p loBit >= \p hiBit, and calls /// setBits when \p loBit < \p hiBit. @@ -1609,11 +1621,7 @@ public: /// returns the smallest bit width that will retain the negative value. For /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so /// for -1, this function will always return 1. - unsigned getMinSignedBits() const { - if (isNegative()) - return BitWidth - countLeadingOnes() + 1; - return getActiveBits() + 1; - } + unsigned getMinSignedBits() const { return BitWidth - getNumSignBits() + 1; } /// Get zero extended value /// diff --git a/llvm/include/llvm/ADT/APSInt.h b/llvm/include/llvm/ADT/APSInt.h index 0f991826c457..82e9ba81141f 100644 --- a/llvm/include/llvm/ADT/APSInt.h +++ b/llvm/include/llvm/ADT/APSInt.h @@ -18,6 +18,7 @@ namespace llvm { +/// An arbitrary precision integer that knows its signedness. class LLVM_NODISCARD APSInt : public APInt { bool IsUnsigned; @@ -25,8 +26,7 @@ public: /// Default constructor that creates an uninitialized APInt. explicit APSInt() : IsUnsigned(false) {} - /// APSInt ctor - Create an APSInt with the specified width, default to - /// unsigned. + /// Create an APSInt with the specified width, default to unsigned. explicit APSInt(uint32_t BitWidth, bool isUnsigned = true) : APInt(BitWidth, 0), IsUnsigned(isUnsigned) {} @@ -78,11 +78,11 @@ public: void setIsUnsigned(bool Val) { IsUnsigned = Val; } void setIsSigned(bool Val) { IsUnsigned = !Val; } - /// toString - Append this APSInt to the specified SmallString. + /// Append this APSInt to the specified SmallString. void toString(SmallVectorImpl<char> &Str, unsigned Radix = 10) const { APInt::toString(Str, Radix, isSigned()); } - /// toString - Converts an APInt to a std::string. This is an inefficient + /// Converts an APInt to a std::string. This is an inefficient /// method; you should prefer passing in a SmallString instead. std::string toString(unsigned Radix) const { return APInt::toString(Radix, isSigned()); @@ -282,15 +282,15 @@ public: return APSInt(~static_cast<const APInt&>(*this), IsUnsigned); } - /// getMaxValue - Return the APSInt representing the maximum integer value - /// with the given bit width and signedness. + /// Return the APSInt representing the maximum integer value with the given + /// bit width and signedness. static APSInt getMaxValue(uint32_t numBits, bool Unsigned) { return APSInt(Unsigned ? APInt::getMaxValue(numBits) : APInt::getSignedMaxValue(numBits), Unsigned); } - /// getMinValue - Return the APSInt representing the minimum integer value - /// with the given bit width and signedness. + /// Return the APSInt representing the minimum integer value with the given + /// bit width and signedness. static APSInt getMinValue(uint32_t numBits, bool Unsigned) { return APSInt(Unsigned ? APInt::getMinValue(numBits) : APInt::getSignedMinValue(numBits), Unsigned); @@ -331,8 +331,8 @@ public: static APSInt get(int64_t X) { return APSInt(APInt(64, X), false); } static APSInt getUnsigned(uint64_t X) { return APSInt(APInt(64, X), true); } - /// Profile - Used to insert APSInt objects, or objects that contain APSInt - /// objects, into FoldingSets. + /// Used to insert APSInt objects, or objects that contain APSInt objects, + /// into FoldingSets. void Profile(FoldingSetNodeID& ID) const; }; diff --git a/llvm/include/llvm/ADT/AllocatorList.h b/llvm/include/llvm/ADT/AllocatorList.h index 447d7a7538db..404a657f27de 100644 --- a/llvm/include/llvm/ADT/AllocatorList.h +++ b/llvm/include/llvm/ADT/AllocatorList.h @@ -118,13 +118,6 @@ private: reference operator*() const { return base_type::wrapped()->V; } pointer operator->() const { return &operator*(); } - - friend bool operator==(const IteratorImpl &L, const IteratorImpl &R) { - return L.wrapped() == R.wrapped(); - } - friend bool operator!=(const IteratorImpl &L, const IteratorImpl &R) { - return !(L == R); - } }; public: diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h index a8d0f07af94a..2a857786f454 100644 --- a/llvm/include/llvm/ADT/BitVector.h +++ b/llvm/include/llvm/ADT/BitVector.h @@ -203,9 +203,10 @@ public: return !any(); } - /// find_first_in - Returns the index of the first set bit in the range - /// [Begin, End). Returns -1 if all bits in the range are unset. - int find_first_in(unsigned Begin, unsigned End) const { + /// find_first_in - Returns the index of the first set / unset bit, + /// depending on \p Set, in the range [Begin, End). + /// Returns -1 if all bits in the range are unset / set. + int find_first_in(unsigned Begin, unsigned End, bool Set = true) const { assert(Begin <= End && End <= Size); if (Begin == End) return -1; @@ -214,8 +215,14 @@ public: unsigned LastWord = (End - 1) / BITWORD_SIZE; // Check subsequent words. + // The code below is based on search for the first _set_ bit. If + // we're searching for the first _unset_, we just take the + // complement of each word before we use it and apply + // the same method. for (unsigned i = FirstWord; i <= LastWord; ++i) { BitWord Copy = Bits[i]; + if (!Set) + Copy = ~Copy; if (i == FirstWord) { unsigned FirstBit = Begin % BITWORD_SIZE; @@ -266,32 +273,7 @@ public: /// find_first_unset_in - Returns the index of the first unset bit in the /// range [Begin, End). Returns -1 if all bits in the range are set. int find_first_unset_in(unsigned Begin, unsigned End) const { - assert(Begin <= End && End <= Size); - if (Begin == End) - return -1; - - unsigned FirstWord = Begin / BITWORD_SIZE; - unsigned LastWord = (End - 1) / BITWORD_SIZE; - - // Check subsequent words. - for (unsigned i = FirstWord; i <= LastWord; ++i) { - BitWord Copy = Bits[i]; - - if (i == FirstWord) { - unsigned FirstBit = Begin % BITWORD_SIZE; - Copy |= maskTrailingOnes<BitWord>(FirstBit); - } - - if (i == LastWord) { - unsigned LastBit = (End - 1) % BITWORD_SIZE; - Copy |= maskTrailingZeros<BitWord>(LastBit + 1); - } - if (Copy != ~BitWord(0)) { - unsigned Result = i * BITWORD_SIZE + countTrailingOnes(Copy); - return Result < size() ? Result : -1; - } - } - return -1; + return find_first_in(Begin, End, /* Set = */ false); } /// find_last_unset_in - Returns the index of the last unset bit in the diff --git a/llvm/include/llvm/ADT/CoalescingBitVector.h b/llvm/include/llvm/ADT/CoalescingBitVector.h index f8c8fec0ec9e..0a7dcfe22631 100644 --- a/llvm/include/llvm/ADT/CoalescingBitVector.h +++ b/llvm/include/llvm/ADT/CoalescingBitVector.h @@ -34,15 +34,14 @@ namespace llvm { /// performance for non-sequential find() operations. /// /// \tparam IndexT - The type of the index into the bitvector. -/// \tparam N - The first N coalesced intervals of set bits are stored in-place. -template <typename IndexT, unsigned N = 16> class CoalescingBitVector { +template <typename IndexT> class CoalescingBitVector { static_assert(std::is_unsigned<IndexT>::value, "Index must be an unsigned integer."); - using ThisT = CoalescingBitVector<IndexT, N>; + using ThisT = CoalescingBitVector<IndexT>; /// An interval map for closed integer ranges. The mapped values are unused. - using MapT = IntervalMap<IndexT, char, N>; + using MapT = IntervalMap<IndexT, char>; using UnderlyingIterator = typename MapT::const_iterator; diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index 34d397cc9793..ce0b05db840c 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -426,8 +426,8 @@ protected: setNumEntries(other.getNumEntries()); setNumTombstones(other.getNumTombstones()); - if (is_trivially_copyable<KeyT>::value && - is_trivially_copyable<ValueT>::value) + if (std::is_trivially_copyable<KeyT>::value && + std::is_trivially_copyable<ValueT>::value) memcpy(reinterpret_cast<void *>(getBuckets()), other.getBuckets(), getNumBuckets() * sizeof(BucketT)); else @@ -954,7 +954,7 @@ public: std::swap(*LHSB, *RHSB); continue; } - // Swap separately and handle any assymetry. + // Swap separately and handle any asymmetry. std::swap(LHSB->getFirst(), RHSB->getFirst()); if (hasLHSValue) { ::new (&RHSB->getSecond()) ValueT(std::move(LHSB->getSecond())); @@ -1042,7 +1042,7 @@ public: if (Small) { // First move the inline buckets into a temporary storage. AlignedCharArrayUnion<BucketT[InlineBuckets]> TmpStorage; - BucketT *TmpBegin = reinterpret_cast<BucketT *>(TmpStorage.buffer); + BucketT *TmpBegin = reinterpret_cast<BucketT *>(&TmpStorage); BucketT *TmpEnd = TmpBegin; // Loop over the buckets, moving non-empty, non-tombstones into the @@ -1132,8 +1132,8 @@ private: assert(Small); // Note that this cast does not violate aliasing rules as we assert that // the memory's dynamic type is the small, inline bucket buffer, and the - // 'storage.buffer' static type is 'char *'. - return reinterpret_cast<const BucketT *>(storage.buffer); + // 'storage' is a POD containing a char buffer. + return reinterpret_cast<const BucketT *>(&storage); } BucketT *getInlineBuckets() { @@ -1144,7 +1144,7 @@ private: const LargeRep *getLargeRep() const { assert(!Small); // Note, same rule about aliasing as with getInlineBuckets. - return reinterpret_cast<const LargeRep *>(storage.buffer); + return reinterpret_cast<const LargeRep *>(&storage); } LargeRep *getLargeRep() { @@ -1190,8 +1190,6 @@ class DenseMapIterator : DebugEpochBase::HandleBase { friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, true>; friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, false>; - using ConstIterator = DenseMapIterator<KeyT, ValueT, KeyInfoT, Bucket, true>; - public: using difference_type = ptrdiff_t; using value_type = @@ -1244,19 +1242,18 @@ public: return Ptr; } - bool operator==(const ConstIterator &RHS) const { - assert((!Ptr || isHandleInSync()) && "handle not in sync!"); + friend bool operator==(const DenseMapIterator &LHS, + const DenseMapIterator &RHS) { + assert((!LHS.Ptr || LHS.isHandleInSync()) && "handle not in sync!"); assert((!RHS.Ptr || RHS.isHandleInSync()) && "handle not in sync!"); - assert(getEpochAddress() == RHS.getEpochAddress() && + assert(LHS.getEpochAddress() == RHS.getEpochAddress() && "comparing incomparable iterators!"); - return Ptr == RHS.Ptr; + return LHS.Ptr == RHS.Ptr; } - bool operator!=(const ConstIterator &RHS) const { - assert((!Ptr || isHandleInSync()) && "handle not in sync!"); - assert((!RHS.Ptr || RHS.isHandleInSync()) && "handle not in sync!"); - assert(getEpochAddress() == RHS.getEpochAddress() && - "comparing incomparable iterators!"); - return Ptr != RHS.Ptr; + + friend bool operator!=(const DenseMapIterator &LHS, + const DenseMapIterator &RHS) { + return !(LHS == RHS); } inline DenseMapIterator& operator++() { // Preincrement diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h index e465331ac6f7..8271b9334b86 100644 --- a/llvm/include/llvm/ADT/DenseMapInfo.h +++ b/llvm/include/llvm/ADT/DenseMapInfo.h @@ -13,6 +13,8 @@ #ifndef LLVM_ADT_DENSEMAPINFO_H #define LLVM_ADT_DENSEMAPINFO_H +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringRef.h" @@ -347,6 +349,49 @@ template <> struct DenseMapInfo<hash_code> { static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; } }; +/// Provide DenseMapInfo for APInt. +template <> struct DenseMapInfo<APInt> { + static inline APInt getEmptyKey() { + APInt V(nullptr, 0); + V.U.VAL = 0; + return V; + } + + static inline APInt getTombstoneKey() { + APInt V(nullptr, 0); + V.U.VAL = 1; + return V; + } + + static unsigned getHashValue(const APInt &Key) { + return static_cast<unsigned>(hash_value(Key)); + } + + static bool isEqual(const APInt &LHS, const APInt &RHS) { + return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS; + } +}; + +/// Provide DenseMapInfo for APSInt, using the DenseMapInfo for APInt. +template <> struct DenseMapInfo<APSInt> { + static inline APSInt getEmptyKey() { + return APSInt(DenseMapInfo<APInt>::getEmptyKey()); + } + + static inline APSInt getTombstoneKey() { + return APSInt(DenseMapInfo<APInt>::getTombstoneKey()); + } + + static unsigned getHashValue(const APSInt &Key) { + return static_cast<unsigned>(hash_value(Key)); + } + + static bool isEqual(const APSInt &LHS, const APSInt &RHS) { + return LHS.getBitWidth() == RHS.getBitWidth() && + LHS.isUnsigned() == RHS.isUnsigned() && LHS == RHS; + } +}; + } // end namespace llvm #endif // LLVM_ADT_DENSEMAPINFO_H diff --git a/llvm/include/llvm/ADT/DenseSet.h b/llvm/include/llvm/ADT/DenseSet.h index 07edc3d8e4ec..edce7c43773c 100644 --- a/llvm/include/llvm/ADT/DenseSet.h +++ b/llvm/include/llvm/ADT/DenseSet.h @@ -130,8 +130,12 @@ public: Iterator& operator++() { ++I; return *this; } Iterator operator++(int) { auto T = *this; ++I; return T; } - bool operator==(const ConstIterator& X) const { return I == X.I; } - bool operator!=(const ConstIterator& X) const { return I != X.I; } + friend bool operator==(const Iterator &X, const Iterator &Y) { + return X.I == Y.I; + } + friend bool operator!=(const Iterator &X, const Iterator &Y) { + return X.I != Y.I; + } }; class ConstIterator { @@ -155,8 +159,12 @@ public: ConstIterator& operator++() { ++I; return *this; } ConstIterator operator++(int) { auto T = *this; ++I; return T; } - bool operator==(const ConstIterator& X) const { return I == X.I; } - bool operator!=(const ConstIterator& X) const { return I != X.I; } + friend bool operator==(const ConstIterator &X, const ConstIterator &Y) { + return X.I == Y.I; + } + friend bool operator!=(const ConstIterator &X, const ConstIterator &Y) { + return X.I != Y.I; + } }; using iterator = Iterator; @@ -173,6 +181,11 @@ public: return ConstIterator(TheMap.find(V)); } + /// Check if the set contains the given element. + bool contains(const_arg_type_t<ValueT> V) const { + return TheMap.find(V) != TheMap.end(); + } + /// Alternative version of find() which allows a different, and possibly less /// expensive, key type. /// The DenseMapInfo is responsible for supplying methods diff --git a/llvm/include/llvm/ADT/DepthFirstIterator.h b/llvm/include/llvm/ADT/DepthFirstIterator.h index 11967f5eefcc..5bfea28332b2 100644 --- a/llvm/include/llvm/ADT/DepthFirstIterator.h +++ b/llvm/include/llvm/ADT/DepthFirstIterator.h @@ -198,7 +198,7 @@ public: // nodes that a depth first iteration did not find: ie unreachable nodes. // bool nodeVisited(NodeRef Node) const { - return this->Visited.count(Node) != 0; + return this->Visited.contains(Node); } /// getPathLength - Return the length of the path from the entry node to the diff --git a/llvm/include/llvm/ADT/DirectedGraph.h b/llvm/include/llvm/ADT/DirectedGraph.h index cfe98e178a91..e8bb9e6b2292 100644 --- a/llvm/include/llvm/ADT/DirectedGraph.h +++ b/llvm/include/llvm/ADT/DirectedGraph.h @@ -38,8 +38,10 @@ public: /// Static polymorphism: delegate implementation (via isEqualTo) to the /// derived class. - bool operator==(const EdgeType &E) const { return getDerived().isEqualTo(E); } - bool operator!=(const EdgeType &E) const { return !operator==(E); } + bool operator==(const DGEdge &E) const { + return getDerived().isEqualTo(E.getDerived()); + } + bool operator!=(const DGEdge &E) const { return !operator==(E); } /// Retrieve the target node this edge connects to. const NodeType &getTargetNode() const { return TargetNode; } @@ -91,8 +93,12 @@ public: /// Static polymorphism: delegate implementation (via isEqualTo) to the /// derived class. - bool operator==(const NodeType &N) const { return getDerived().isEqualTo(N); } - bool operator!=(const NodeType &N) const { return !operator==(N); } + friend bool operator==(const NodeType &M, const NodeType &N) { + return M.isEqualTo(N); + } + friend bool operator!=(const NodeType &M, const NodeType &N) { + return !(M == N); + } const_iterator begin() const { return Edges.begin(); } const_iterator end() const { return Edges.end(); } @@ -223,7 +229,7 @@ public: if (*Node == N) continue; Node->findEdgesTo(N, TempList); - EL.insert(EL.end(), TempList.begin(), TempList.end()); + llvm::append_range(EL, TempList); TempList.clear(); } return !EL.empty(); diff --git a/llvm/include/llvm/ADT/FloatingPointMode.h b/llvm/include/llvm/ADT/FloatingPointMode.h index 3ba8ae1b2855..698830937870 100644 --- a/llvm/include/llvm/ADT/FloatingPointMode.h +++ b/llvm/include/llvm/ADT/FloatingPointMode.h @@ -44,6 +44,24 @@ enum class RoundingMode : int8_t { Invalid = -1 ///< Denotes invalid value. }; +/// Returns text representation of the given rounding mode. +inline StringRef spell(RoundingMode RM) { + switch (RM) { + case RoundingMode::TowardZero: return "towardzero"; + case RoundingMode::NearestTiesToEven: return "tonearest"; + case RoundingMode::TowardPositive: return "upward"; + case RoundingMode::TowardNegative: return "downward"; + case RoundingMode::NearestTiesToAway: return "tonearestaway"; + case RoundingMode::Dynamic: return "dynamic"; + default: return "invalid"; + } +} + +inline raw_ostream &operator << (raw_ostream &OS, RoundingMode RM) { + OS << spell(RM); + return OS; +} + /// Represent subnormal handling kind for floating point instruction inputs and /// outputs. struct DenormalMode { diff --git a/llvm/include/llvm/ADT/FunctionExtras.h b/llvm/include/llvm/ADT/FunctionExtras.h index 4c75e4d2547b..7f8fb103f148 100644 --- a/llvm/include/llvm/ADT/FunctionExtras.h +++ b/llvm/include/llvm/ADT/FunctionExtras.h @@ -64,12 +64,12 @@ template <typename ReturnT, typename... ParamTs> class UniqueFunctionBase { protected: static constexpr size_t InlineStorageSize = sizeof(void *) * 3; - // MSVC has a bug and ICEs if we give it a particular dependent value - // expression as part of the `std::conditional` below. To work around this, - // we build that into a template struct's constexpr bool. - template <typename T> struct IsSizeLessThanThresholdT { - static constexpr bool value = sizeof(T) <= (2 * sizeof(void *)); - }; + template <typename T, class = void> + struct IsSizeLessThanThresholdT : std::false_type {}; + + template <typename T> + struct IsSizeLessThanThresholdT< + T, std::enable_if_t<sizeof(T) <= 2 * sizeof(void *)>> : std::true_type {}; // Provide a type function to map parameters that won't observe extra copies // or moves and which are small enough to likely pass in register to values diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h index 9ee310c879fd..cb53b7fa7469 100644 --- a/llvm/include/llvm/ADT/Hashing.h +++ b/llvm/include/llvm/ADT/Hashing.h @@ -52,6 +52,7 @@ #include <cassert> #include <cstring> #include <string> +#include <tuple> #include <utility> namespace llvm { @@ -112,6 +113,10 @@ template <typename T> hash_code hash_value(const T *ptr); template <typename T, typename U> hash_code hash_value(const std::pair<T, U> &arg); +/// Compute a hash_code for a tuple. +template <typename... Ts> +hash_code hash_value(const std::tuple<Ts...> &arg); + /// Compute a hash_code for a standard string. template <typename T> hash_code hash_value(const std::basic_string<T> &arg); @@ -645,6 +650,26 @@ hash_code hash_value(const std::pair<T, U> &arg) { return hash_combine(arg.first, arg.second); } +// Implementation details for the hash_value overload for std::tuple<...>(...). +namespace hashing { +namespace detail { + +template <typename... Ts, std::size_t... Indices> +hash_code hash_value_tuple_helper(const std::tuple<Ts...> &arg, + std::index_sequence<Indices...> indices) { + return hash_combine(std::get<Indices>(arg)...); +} + +} // namespace detail +} // namespace hashing + +template <typename... Ts> +hash_code hash_value(const std::tuple<Ts...> &arg) { + // TODO: Use std::apply when LLVM starts using C++17. + return ::llvm::hashing::detail::hash_value_tuple_helper( + arg, typename std::index_sequence_for<Ts...>()); +} + // Declared and documented above, but defined here so that any of the hashing // infrastructure is available. template <typename T> diff --git a/llvm/include/llvm/ADT/ImmutableMap.h b/llvm/include/llvm/ADT/ImmutableMap.h index 30689d2274a8..81b21a7319a7 100644 --- a/llvm/include/llvm/ADT/ImmutableMap.h +++ b/llvm/include/llvm/ADT/ImmutableMap.h @@ -355,7 +355,7 @@ public: unsigned getHeight() const { return Root ? Root->getHeight() : 0; } static inline void Profile(FoldingSetNodeID &ID, const ImmutableMapRef &M) { - ID.AddPointer(M.Root); + ID.AddPointer(M.Root.get()); } inline void Profile(FoldingSetNodeID &ID) const { return Profile(ID, *this); } diff --git a/llvm/include/llvm/ADT/IntervalMap.h b/llvm/include/llvm/ADT/IntervalMap.h index db7804d0a551..0b6c7d667807 100644 --- a/llvm/include/llvm/ADT/IntervalMap.h +++ b/llvm/include/llvm/ADT/IntervalMap.h @@ -963,8 +963,7 @@ public: private: // The root data is either a RootLeaf or a RootBranchData instance. - alignas(RootLeaf) alignas(RootBranchData) - AlignedCharArrayUnion<RootLeaf, RootBranchData> data; + AlignedCharArrayUnion<RootLeaf, RootBranchData> data; // Tree height. // 0: Leaves in root. @@ -979,10 +978,7 @@ private: Allocator &allocator; /// Represent data as a node type without breaking aliasing rules. - template <typename T> - T &dataAs() const { - return *bit_cast<T *>(const_cast<char *>(data.buffer)); - } + template <typename T> T &dataAs() const { return *bit_cast<T *>(&data); } const RootLeaf &rootLeaf() const { assert(!branched() && "Cannot acces leaf data in branched root"); @@ -1040,7 +1036,7 @@ private: public: explicit IntervalMap(Allocator &a) : height(0), rootSize(0), allocator(a) { - assert((uintptr_t(data.buffer) & (alignof(RootLeaf) - 1)) == 0 && + assert((uintptr_t(&data) & (alignof(RootLeaf) - 1)) == 0 && "Insufficient alignment"); new(&rootLeaf()) RootLeaf(); } diff --git a/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h b/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h index 6d97fe15db8b..ca4c40db48b9 100644 --- a/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h +++ b/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h @@ -58,6 +58,7 @@ #include <atomic> #include <cassert> #include <cstddef> +#include <memory> namespace llvm { @@ -70,10 +71,23 @@ namespace llvm { template <class Derived> class RefCountedBase { mutable unsigned RefCount = 0; -public: +protected: RefCountedBase() = default; RefCountedBase(const RefCountedBase &) {} + RefCountedBase &operator=(const RefCountedBase &) = delete; + +#ifndef NDEBUG + ~RefCountedBase() { + assert(RefCount == 0 && + "Destruction occured when there are still references to this."); + } +#else + // Default the destructor in release builds, A trivial destructor may enable + // better codegen. + ~RefCountedBase() = default; +#endif +public: void Retain() const { ++RefCount; } void Release() const { @@ -85,10 +99,24 @@ public: /// A thread-safe version of \c RefCountedBase. template <class Derived> class ThreadSafeRefCountedBase { - mutable std::atomic<int> RefCount; + mutable std::atomic<int> RefCount{0}; protected: - ThreadSafeRefCountedBase() : RefCount(0) {} + ThreadSafeRefCountedBase() = default; + ThreadSafeRefCountedBase(const ThreadSafeRefCountedBase &) {} + ThreadSafeRefCountedBase & + operator=(const ThreadSafeRefCountedBase &) = delete; + +#ifndef NDEBUG + ~ThreadSafeRefCountedBase() { + assert(RefCount == 0 && + "Destruction occured when there are still references to this."); + } +#else + // Default the destructor in release builds, A trivial destructor may enable + // better codegen. + ~ThreadSafeRefCountedBase() = default; +#endif public: void Retain() const { RefCount.fetch_add(1, std::memory_order_relaxed); } @@ -149,6 +177,11 @@ public: } template <class X> + IntrusiveRefCntPtr(std::unique_ptr<X> S) : Obj(S.release()) { + retain(); + } + + template <class X> IntrusiveRefCntPtr(const IntrusiveRefCntPtr<X> &S) : Obj(S.get()) { retain(); } @@ -264,6 +297,12 @@ template <class T> struct simplify_type<const IntrusiveRefCntPtr<T>> { } }; +/// Factory function for creating intrusive ref counted pointers. +template <typename T, typename... Args> +IntrusiveRefCntPtr<T> makeIntrusiveRefCnt(Args &&...A) { + return IntrusiveRefCntPtr<T>(new T(std::forward<Args>(A)...)); +} + } // end namespace llvm #endif // LLVM_ADT_INTRUSIVEREFCNTPTR_H diff --git a/llvm/include/llvm/ADT/Optional.h b/llvm/include/llvm/ADT/Optional.h index c64b82352397..a285c81d1be8 100644 --- a/llvm/include/llvm/ADT/Optional.h +++ b/llvm/include/llvm/ADT/Optional.h @@ -15,6 +15,7 @@ #ifndef LLVM_ADT_OPTIONAL_H #define LLVM_ADT_OPTIONAL_H +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/type_traits.h" @@ -32,7 +33,30 @@ namespace optional_detail { struct in_place_t {}; /// Storage for any type. -template <typename T, bool = is_trivially_copyable<T>::value> +// +// The specialization condition intentionally uses +// llvm::is_trivially_copy_constructible instead of +// std::is_trivially_copy_constructible. GCC versions prior to 7.4 may +// instantiate the copy constructor of `T` when +// std::is_trivially_copy_constructible is instantiated. This causes +// compilation to fail if we query the trivially copy constructible property of +// a class which is not copy constructible. +// +// The current implementation of OptionalStorage insists that in order to use +// the trivial specialization, the value_type must be trivially copy +// constructible and trivially copy assignable due to =default implementations +// of the copy/move constructor/assignment. It does not follow that this is +// necessarily the case std::is_trivially_copyable is true (hence the expanded +// specialization condition). +// +// The move constructible / assignable conditions emulate the remaining behavior +// of std::is_trivially_copyable. +template <typename T, bool = (llvm::is_trivially_copy_constructible<T>::value && + std::is_trivially_copy_assignable<T>::value && + (std::is_trivially_move_constructible<T>::value || + !std::is_move_constructible<T>::value) && + (std::is_trivially_move_assignable<T>::value || + !std::is_move_assignable<T>::value))> class OptionalStorage { union { char empty; @@ -43,21 +67,21 @@ class OptionalStorage { public: ~OptionalStorage() { reset(); } - OptionalStorage() noexcept : empty(), hasVal(false) {} + constexpr OptionalStorage() noexcept : empty(), hasVal(false) {} - OptionalStorage(OptionalStorage const &other) : OptionalStorage() { + constexpr OptionalStorage(OptionalStorage const &other) : OptionalStorage() { if (other.hasValue()) { emplace(other.value); } } - OptionalStorage(OptionalStorage &&other) : OptionalStorage() { + constexpr OptionalStorage(OptionalStorage &&other) : OptionalStorage() { if (other.hasValue()) { emplace(std::move(other.value)); } } template <class... Args> - explicit OptionalStorage(in_place_t, Args &&... args) + constexpr explicit OptionalStorage(in_place_t, Args &&... args) : value(std::forward<Args>(args)...), hasVal(true) {} void reset() noexcept { @@ -67,13 +91,13 @@ public: } } - bool hasValue() const noexcept { return hasVal; } + constexpr bool hasValue() const noexcept { return hasVal; } T &getValue() LLVM_LVALUE_FUNCTION noexcept { assert(hasVal); return value; } - T const &getValue() const LLVM_LVALUE_FUNCTION noexcept { + constexpr T const &getValue() const LLVM_LVALUE_FUNCTION noexcept { assert(hasVal); return value; } @@ -148,16 +172,16 @@ template <typename T> class OptionalStorage<T, true> { public: ~OptionalStorage() = default; - OptionalStorage() noexcept : empty{} {} + constexpr OptionalStorage() noexcept : empty{} {} - OptionalStorage(OptionalStorage const &other) = default; - OptionalStorage(OptionalStorage &&other) = default; + constexpr OptionalStorage(OptionalStorage const &other) = default; + constexpr OptionalStorage(OptionalStorage &&other) = default; OptionalStorage &operator=(OptionalStorage const &other) = default; OptionalStorage &operator=(OptionalStorage &&other) = default; template <class... Args> - explicit OptionalStorage(in_place_t, Args &&... args) + constexpr explicit OptionalStorage(in_place_t, Args &&... args) : value(std::forward<Args>(args)...), hasVal(true) {} void reset() noexcept { @@ -167,13 +191,13 @@ public: } } - bool hasValue() const noexcept { return hasVal; } + constexpr bool hasValue() const noexcept { return hasVal; } T &getValue() LLVM_LVALUE_FUNCTION noexcept { assert(hasVal); return value; } - T const &getValue() const LLVM_LVALUE_FUNCTION noexcept { + constexpr T const &getValue() const LLVM_LVALUE_FUNCTION noexcept { assert(hasVal); return value; } @@ -221,11 +245,12 @@ public: constexpr Optional() {} constexpr Optional(NoneType) {} - Optional(const T &y) : Storage(optional_detail::in_place_t{}, y) {} - Optional(const Optional &O) = default; + constexpr Optional(const T &y) : Storage(optional_detail::in_place_t{}, y) {} + constexpr Optional(const Optional &O) = default; - Optional(T &&y) : Storage(optional_detail::in_place_t{}, std::move(y)) {} - Optional(Optional &&O) = default; + constexpr Optional(T &&y) + : Storage(optional_detail::in_place_t{}, std::move(y)) {} + constexpr Optional(Optional &&O) = default; Optional &operator=(T &&y) { Storage = std::move(y); @@ -238,7 +263,7 @@ public: Storage.emplace(std::forward<ArgTypes>(Args)...); } - static inline Optional create(const T *y) { + static constexpr Optional create(const T *y) { return y ? Optional(*y) : Optional(); } @@ -250,16 +275,20 @@ public: void reset() { Storage.reset(); } - const T *getPointer() const { return &Storage.getValue(); } + constexpr const T *getPointer() const { return &Storage.getValue(); } T *getPointer() { return &Storage.getValue(); } - const T &getValue() const LLVM_LVALUE_FUNCTION { return Storage.getValue(); } + constexpr const T &getValue() const LLVM_LVALUE_FUNCTION { + return Storage.getValue(); + } T &getValue() LLVM_LVALUE_FUNCTION { return Storage.getValue(); } - explicit operator bool() const { return hasValue(); } - bool hasValue() const { return Storage.hasValue(); } - const T *operator->() const { return getPointer(); } + constexpr explicit operator bool() const { return hasValue(); } + constexpr bool hasValue() const { return Storage.hasValue(); } + constexpr const T *operator->() const { return getPointer(); } T *operator->() { return getPointer(); } - const T &operator*() const LLVM_LVALUE_FUNCTION { return getValue(); } + constexpr const T &operator*() const LLVM_LVALUE_FUNCTION { + return getValue(); + } T &operator*() LLVM_LVALUE_FUNCTION { return getValue(); } template <typename U> @@ -294,137 +323,157 @@ public: #endif }; +template <class T> llvm::hash_code hash_value(const Optional<T> &O) { + return O ? hash_combine(true, *O) : hash_value(false); +} + template <typename T, typename U> -bool operator==(const Optional<T> &X, const Optional<U> &Y) { +constexpr bool operator==(const Optional<T> &X, const Optional<U> &Y) { if (X && Y) return *X == *Y; return X.hasValue() == Y.hasValue(); } template <typename T, typename U> -bool operator!=(const Optional<T> &X, const Optional<U> &Y) { +constexpr bool operator!=(const Optional<T> &X, const Optional<U> &Y) { return !(X == Y); } template <typename T, typename U> -bool operator<(const Optional<T> &X, const Optional<U> &Y) { +constexpr bool operator<(const Optional<T> &X, const Optional<U> &Y) { if (X && Y) return *X < *Y; return X.hasValue() < Y.hasValue(); } template <typename T, typename U> -bool operator<=(const Optional<T> &X, const Optional<U> &Y) { +constexpr bool operator<=(const Optional<T> &X, const Optional<U> &Y) { return !(Y < X); } template <typename T, typename U> -bool operator>(const Optional<T> &X, const Optional<U> &Y) { +constexpr bool operator>(const Optional<T> &X, const Optional<U> &Y) { return Y < X; } template <typename T, typename U> -bool operator>=(const Optional<T> &X, const Optional<U> &Y) { +constexpr bool operator>=(const Optional<T> &X, const Optional<U> &Y) { return !(X < Y); } -template<typename T> -bool operator==(const Optional<T> &X, NoneType) { +template <typename T> +constexpr bool operator==(const Optional<T> &X, NoneType) { return !X; } -template<typename T> -bool operator==(NoneType, const Optional<T> &X) { +template <typename T> +constexpr bool operator==(NoneType, const Optional<T> &X) { return X == None; } -template<typename T> -bool operator!=(const Optional<T> &X, NoneType) { +template <typename T> +constexpr bool operator!=(const Optional<T> &X, NoneType) { return !(X == None); } -template<typename T> -bool operator!=(NoneType, const Optional<T> &X) { +template <typename T> +constexpr bool operator!=(NoneType, const Optional<T> &X) { return X != None; } -template <typename T> bool operator<(const Optional<T> &X, NoneType) { +template <typename T> constexpr bool operator<(const Optional<T> &X, NoneType) { return false; } -template <typename T> bool operator<(NoneType, const Optional<T> &X) { +template <typename T> constexpr bool operator<(NoneType, const Optional<T> &X) { return X.hasValue(); } -template <typename T> bool operator<=(const Optional<T> &X, NoneType) { +template <typename T> +constexpr bool operator<=(const Optional<T> &X, NoneType) { return !(None < X); } -template <typename T> bool operator<=(NoneType, const Optional<T> &X) { +template <typename T> +constexpr bool operator<=(NoneType, const Optional<T> &X) { return !(X < None); } -template <typename T> bool operator>(const Optional<T> &X, NoneType) { +template <typename T> constexpr bool operator>(const Optional<T> &X, NoneType) { return None < X; } -template <typename T> bool operator>(NoneType, const Optional<T> &X) { +template <typename T> constexpr bool operator>(NoneType, const Optional<T> &X) { return X < None; } -template <typename T> bool operator>=(const Optional<T> &X, NoneType) { +template <typename T> +constexpr bool operator>=(const Optional<T> &X, NoneType) { return None <= X; } -template <typename T> bool operator>=(NoneType, const Optional<T> &X) { +template <typename T> +constexpr bool operator>=(NoneType, const Optional<T> &X) { return X <= None; } -template <typename T> bool operator==(const Optional<T> &X, const T &Y) { +template <typename T> +constexpr bool operator==(const Optional<T> &X, const T &Y) { return X && *X == Y; } -template <typename T> bool operator==(const T &X, const Optional<T> &Y) { +template <typename T> +constexpr bool operator==(const T &X, const Optional<T> &Y) { return Y && X == *Y; } -template <typename T> bool operator!=(const Optional<T> &X, const T &Y) { +template <typename T> +constexpr bool operator!=(const Optional<T> &X, const T &Y) { return !(X == Y); } -template <typename T> bool operator!=(const T &X, const Optional<T> &Y) { +template <typename T> +constexpr bool operator!=(const T &X, const Optional<T> &Y) { return !(X == Y); } -template <typename T> bool operator<(const Optional<T> &X, const T &Y) { +template <typename T> +constexpr bool operator<(const Optional<T> &X, const T &Y) { return !X || *X < Y; } -template <typename T> bool operator<(const T &X, const Optional<T> &Y) { +template <typename T> +constexpr bool operator<(const T &X, const Optional<T> &Y) { return Y && X < *Y; } -template <typename T> bool operator<=(const Optional<T> &X, const T &Y) { +template <typename T> +constexpr bool operator<=(const Optional<T> &X, const T &Y) { return !(Y < X); } -template <typename T> bool operator<=(const T &X, const Optional<T> &Y) { +template <typename T> +constexpr bool operator<=(const T &X, const Optional<T> &Y) { return !(Y < X); } -template <typename T> bool operator>(const Optional<T> &X, const T &Y) { +template <typename T> +constexpr bool operator>(const Optional<T> &X, const T &Y) { return Y < X; } -template <typename T> bool operator>(const T &X, const Optional<T> &Y) { +template <typename T> +constexpr bool operator>(const T &X, const Optional<T> &Y) { return Y < X; } -template <typename T> bool operator>=(const Optional<T> &X, const T &Y) { +template <typename T> +constexpr bool operator>=(const Optional<T> &X, const T &Y) { return !(X < Y); } -template <typename T> bool operator>=(const T &X, const Optional<T> &Y) { +template <typename T> +constexpr bool operator>=(const T &X, const Optional<T> &Y) { return !(X < Y); } diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h index 6fecff8d756f..c39691061b72 100644 --- a/llvm/include/llvm/ADT/PointerUnion.h +++ b/llvm/include/llvm/ADT/PointerUnion.h @@ -93,13 +93,6 @@ namespace pointer_union_detail { static constexpr int NumLowBitsAvailable = lowBitsAvailable<PTs...>(); }; - /// Implement assignment in terms of construction. - template <typename Derived, typename T> struct AssignableFrom { - Derived &operator=(T t) { - return static_cast<Derived &>(*this) = Derived(t); - } - }; - template <typename Derived, typename ValTy, int I, typename ...Types> class PointerUnionMembers; diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 50b688b36648..63c7f48a5bd2 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -193,9 +193,15 @@ public: template <typename Callable> function_ref( Callable &&callable, + // This is not the copy-constructor. std::enable_if_t< !std::is_same<std::remove_cv_t<std::remove_reference_t<Callable>>, - function_ref>::value> * = nullptr) + function_ref>::value> * = nullptr, + // Functor must be callable and return a suitable type. + std::enable_if_t<std::is_void<Ret>::value || + std::is_convertible<decltype(std::declval<Callable>()( + std::declval<Params>()...)), + Ret>::value> * = nullptr) : callback(callback_fn<typename std::remove_reference<Callable>::type>), callable(reinterpret_cast<intptr_t>(&callable)) {} @@ -206,15 +212,6 @@ public: explicit operator bool() const { return callback; } }; -// deleter - Very very very simple method that is used to invoke operator -// delete on something. It is used like this: -// -// for_each(V.begin(), B.end(), deleter<Interval>); -template <class T> -inline void deleter(T *Ptr) { - delete Ptr; -} - //===----------------------------------------------------------------------===// // Extra additions to <iterator> //===----------------------------------------------------------------------===// @@ -275,7 +272,7 @@ template <typename ContainerTy> bool hasSingleElement(ContainerTy &&C) { /// Return a range covering \p RangeOrContainer with the first N elements /// excluded. -template <typename T> auto drop_begin(T &&RangeOrContainer, size_t N) { +template <typename T> auto drop_begin(T &&RangeOrContainer, size_t N = 1) { return make_range(std::next(adl_begin(RangeOrContainer), N), adl_end(RangeOrContainer)); } @@ -541,7 +538,7 @@ public: early_inc_iterator_impl(WrappedIteratorT I) : BaseT(I) {} using BaseT::operator*; - typename BaseT::reference operator*() { + decltype(*std::declval<WrappedIteratorT>()) operator*() { #if LLVM_ENABLE_ABI_BREAKING_CHECKS assert(!IsEarlyIncremented && "Cannot dereference twice!"); IsEarlyIncremented = true; @@ -558,12 +555,12 @@ public: return *this; } - using BaseT::operator==; - bool operator==(const early_inc_iterator_impl &RHS) const { + friend bool operator==(const early_inc_iterator_impl &LHS, + const early_inc_iterator_impl &RHS) { #if LLVM_ENABLE_ABI_BREAKING_CHECKS - assert(!IsEarlyIncremented && "Cannot compare after dereferencing!"); + assert(!LHS.IsEarlyIncremented && "Cannot compare after dereferencing!"); #endif - return BaseT::operator==(RHS); + return (const BaseT &)LHS == (const BaseT &)RHS; } }; @@ -1246,6 +1243,15 @@ public: } }; +/// Given a container of pairs, return a range over the first elements. +template <typename ContainerTy> auto make_first_range(ContainerTy &&c) { + return llvm::map_range( + std::forward<ContainerTy>(c), + [](decltype((*std::begin(c))) elt) -> decltype((elt.first)) { + return elt.first; + }); +} + /// Given a container of pairs, return a range over the second elements. template <typename ContainerTy> auto make_second_range(ContainerTy &&c) { return llvm::map_range( @@ -1422,7 +1428,7 @@ template <typename T> // is trivially copyable. using sort_trivially_copyable = conjunction< std::is_pointer<T>, - is_trivially_copyable<typename std::iterator_traits<T>::value_type>>; + std::is_trivially_copyable<typename std::iterator_traits<T>::value_type>>; } // namespace detail // Provide wrappers to std::sort which shuffle the elements before sorting @@ -1471,18 +1477,19 @@ inline void sort(Container &&C, Compare Comp) { /// which is only enabled when the operation is O(1). template <typename R> auto size(R &&Range, - std::enable_if_t<std::is_same<typename std::iterator_traits<decltype( - Range.begin())>::iterator_category, - std::random_access_iterator_tag>::value, - void> * = nullptr) { + std::enable_if_t< + std::is_base_of<std::random_access_iterator_tag, + typename std::iterator_traits<decltype( + Range.begin())>::iterator_category>::value, + void> * = nullptr) { return std::distance(Range.begin(), Range.end()); } /// Provide wrappers to std::for_each which take ranges instead of having to /// pass begin/end explicitly. -template <typename R, typename UnaryPredicate> -UnaryPredicate for_each(R &&Range, UnaryPredicate P) { - return std::for_each(adl_begin(Range), adl_end(Range), P); +template <typename R, typename UnaryFunction> +UnaryFunction for_each(R &&Range, UnaryFunction F) { + return std::for_each(adl_begin(Range), adl_end(Range), F); } /// Provide wrappers to std::all_of which take ranges instead of having to pass @@ -1543,6 +1550,13 @@ OutputIt copy(R &&Range, OutputIt Out) { return std::copy(adl_begin(Range), adl_end(Range), Out); } +/// Provide wrappers to std::move which take ranges instead of having to +/// pass begin/end explicitly. +template <typename R, typename OutputIt> +OutputIt move(R &&Range, OutputIt Out) { + return std::move(adl_begin(Range), adl_end(Range), Out); +} + /// Wrapper function around std::find to detect if an element exists /// in a container. template <typename R, typename E> @@ -1577,9 +1591,9 @@ auto count_if(R &&Range, UnaryPredicate P) { /// Wrapper function around std::transform to apply a function to a range and /// store the result elsewhere. -template <typename R, typename OutputIt, typename UnaryPredicate> -OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P) { - return std::transform(adl_begin(Range), adl_end(Range), d_first, P); +template <typename R, typename OutputIt, typename UnaryFunction> +OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F) { + return std::transform(adl_begin(Range), adl_end(Range), d_first, F); } /// Provide wrappers to std::partition which take ranges instead of having to @@ -1654,6 +1668,22 @@ void erase_if(Container &C, UnaryPredicate P) { C.erase(remove_if(C, P), C.end()); } +/// Wrapper function to remove a value from a container: +/// +/// C.erase(remove(C.begin(), C.end(), V), C.end()); +template <typename Container, typename ValueType> +void erase_value(Container &C, ValueType V) { + C.erase(std::remove(C.begin(), C.end(), V), C.end()); +} + +/// Wrapper function to append a range to a container. +/// +/// C.insert(C.end(), R.begin(), R.end()); +template <typename Container, typename Range> +inline void append_range(Container &C, Range &&R) { + C.insert(C.end(), R.begin(), R.end()); +} + /// Given a sequence container Cont, replace the range [ContIt, ContEnd) with /// the range [ValIt, ValEnd) (which is not from the same container). template<typename Container, typename RandomAccessIterator> @@ -1911,16 +1941,16 @@ decltype(auto) apply_tuple(F &&f, Tuple &&t) { /// Return true if the sequence [Begin, End) has exactly N items. Runs in O(N) /// time. Not meant for use with random-access iterators. /// Can optionally take a predicate to filter lazily some items. -template<typename IterTy, - typename Pred = bool (*)(const decltype(*std::declval<IterTy>()) &)> +template <typename IterTy, + typename Pred = bool (*)(const decltype(*std::declval<IterTy>()) &)> bool hasNItems( IterTy &&Begin, IterTy &&End, unsigned N, Pred &&ShouldBeCounted = [](const decltype(*std::declval<IterTy>()) &) { return true; }, std::enable_if_t< - !std::is_same<typename std::iterator_traits<std::remove_reference_t< - decltype(Begin)>>::iterator_category, - std::random_access_iterator_tag>::value, + !std::is_base_of<std::random_access_iterator_tag, + typename std::iterator_traits<std::remove_reference_t< + decltype(Begin)>>::iterator_category>::value, void> * = nullptr) { for (; N; ++Begin) { if (Begin == End) @@ -1936,16 +1966,16 @@ bool hasNItems( /// Return true if the sequence [Begin, End) has N or more items. Runs in O(N) /// time. Not meant for use with random-access iterators. /// Can optionally take a predicate to lazily filter some items. -template<typename IterTy, - typename Pred = bool (*)(const decltype(*std::declval<IterTy>()) &)> +template <typename IterTy, + typename Pred = bool (*)(const decltype(*std::declval<IterTy>()) &)> bool hasNItemsOrMore( IterTy &&Begin, IterTy &&End, unsigned N, Pred &&ShouldBeCounted = [](const decltype(*std::declval<IterTy>()) &) { return true; }, std::enable_if_t< - !std::is_same<typename std::iterator_traits<std::remove_reference_t< - decltype(Begin)>>::iterator_category, - std::random_access_iterator_tag>::value, + !std::is_base_of<std::random_access_iterator_tag, + typename std::iterator_traits<std::remove_reference_t< + decltype(Begin)>>::iterator_category>::value, void> * = nullptr) { for (; N; ++Begin) { if (Begin == End) diff --git a/llvm/include/llvm/ADT/Sequence.h b/llvm/include/llvm/ADT/Sequence.h index 8c505f2010dd..8a695d75f77a 100644 --- a/llvm/include/llvm/ADT/Sequence.h +++ b/llvm/include/llvm/ADT/Sequence.h @@ -42,6 +42,10 @@ public: value_sequence_iterator(const value_sequence_iterator &) = default; value_sequence_iterator(value_sequence_iterator &&Arg) : Value(std::move(Arg.Value)) {} + value_sequence_iterator &operator=(const value_sequence_iterator &Arg) { + Value = Arg.Value; + return *this; + } template <typename U, typename Enabler = decltype(ValueT(std::declval<U>()))> value_sequence_iterator(U &&Value) : Value(std::forward<U>(Value)) {} diff --git a/llvm/include/llvm/ADT/SetVector.h b/llvm/include/llvm/ADT/SetVector.h index 91ad72143ed3..32bcd50966cc 100644 --- a/llvm/include/llvm/ADT/SetVector.h +++ b/llvm/include/llvm/ADT/SetVector.h @@ -205,6 +205,11 @@ public: return true; } + /// Check if the SetVector contains the given key. + bool contains(const key_type &key) const { + return set_.find(key) != set_.end(); + } + /// Count the number of elements of a given key in the SetVector. /// \returns 0 if the element is not in the SetVector, 1 if it is. size_type count(const key_type &key) const { diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h index 0ab05cfe611a..57dd8f6b695d 100644 --- a/llvm/include/llvm/ADT/SmallPtrSet.h +++ b/llvm/include/llvm/ADT/SmallPtrSet.h @@ -378,6 +378,9 @@ public: iterator find(ConstPtrType Ptr) const { return makeIterator(find_imp(ConstPtrTraits::getAsVoidPointer(Ptr))); } + bool contains(ConstPtrType Ptr) const { + return find_imp(ConstPtrTraits::getAsVoidPointer(Ptr)) != EndPointer(); + } template <typename IterT> void insert(IterT I, IterT E) { diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h index a03fa7dd8423..0600e528ee69 100644 --- a/llvm/include/llvm/ADT/SmallSet.h +++ b/llvm/include/llvm/ADT/SmallSet.h @@ -232,6 +232,13 @@ public: return {Set.end()}; } + /// Check if the SmallSet contains the given element. + bool contains(const T &V) const { + if (isSmall()) + return vfind(V) != Vector.end(); + return Set.find(V) != Set.end(); + } + private: bool isSmall() const { return Set.empty(); } diff --git a/llvm/include/llvm/ADT/SmallString.h b/llvm/include/llvm/ADT/SmallString.h index cd6f2173d04f..5a56321ae492 100644 --- a/llvm/include/llvm/ADT/SmallString.h +++ b/llvm/include/llvm/ADT/SmallString.h @@ -30,63 +30,56 @@ public: /// Initialize from a StringRef. SmallString(StringRef S) : SmallVector<char, InternalLen>(S.begin(), S.end()) {} + /// Initialize by concatenating a list of StringRefs. + SmallString(std::initializer_list<StringRef> Refs) + : SmallVector<char, InternalLen>() { + this->append(Refs); + } + /// Initialize with a range. template<typename ItTy> SmallString(ItTy S, ItTy E) : SmallVector<char, InternalLen>(S, E) {} - // Note that in order to add new overloads for append & assign, we have to - // duplicate the inherited versions so as not to inadvertently hide them. - /// @} /// @name String Assignment /// @{ - /// Assign from a repeated element. - void assign(size_t NumElts, char Elt) { - this->SmallVectorImpl<char>::assign(NumElts, Elt); - } - - /// Assign from an iterator pair. - template<typename in_iter> - void assign(in_iter S, in_iter E) { - this->clear(); - SmallVectorImpl<char>::append(S, E); - } + using SmallVector<char, InternalLen>::assign; /// Assign from a StringRef. void assign(StringRef RHS) { - this->clear(); - SmallVectorImpl<char>::append(RHS.begin(), RHS.end()); + SmallVectorImpl<char>::assign(RHS.begin(), RHS.end()); } - /// Assign from a SmallVector. - void assign(const SmallVectorImpl<char> &RHS) { + /// Assign from a list of StringRefs. + void assign(std::initializer_list<StringRef> Refs) { this->clear(); - SmallVectorImpl<char>::append(RHS.begin(), RHS.end()); + append(Refs); } /// @} /// @name String Concatenation /// @{ - /// Append from an iterator pair. - template<typename in_iter> - void append(in_iter S, in_iter E) { - SmallVectorImpl<char>::append(S, E); - } - - void append(size_t NumInputs, char Elt) { - SmallVectorImpl<char>::append(NumInputs, Elt); - } + using SmallVector<char, InternalLen>::append; /// Append from a StringRef. void append(StringRef RHS) { SmallVectorImpl<char>::append(RHS.begin(), RHS.end()); } - /// Append from a SmallVector. - void append(const SmallVectorImpl<char> &RHS) { - SmallVectorImpl<char>::append(RHS.begin(), RHS.end()); + /// Append from a list of StringRefs. + void append(std::initializer_list<StringRef> Refs) { + size_t SizeNeeded = this->size(); + for (const StringRef &Ref : Refs) + SizeNeeded += Ref.size(); + this->reserve(SizeNeeded); + auto CurEnd = this->end(); + for (const StringRef &Ref : Refs) { + this->uninitialized_copy(Ref.begin(), Ref.end(), CurEnd); + CurEnd += Ref.size(); + } + this->set_size(SizeNeeded); } /// @} @@ -280,9 +273,9 @@ public: } // Extra operators. - const SmallString &operator=(StringRef RHS) { - this->clear(); - return *this += RHS; + SmallString &operator=(StringRef RHS) { + this->assign(RHS); + return *this; } SmallString &operator+=(StringRef RHS) { diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 3ccee3d21d48..e960b272db04 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -14,7 +14,6 @@ #define LLVM_ADT_SMALLVECTOR_H #include "llvm/ADT/iterator_range.h" -#include "llvm/Support/AlignOf.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -57,10 +56,15 @@ protected: SmallVectorBase(void *FirstEl, size_t TotalCapacity) : BeginX(FirstEl), Capacity(TotalCapacity) {} + /// This is a helper for \a grow() that's out of line to reduce code + /// duplication. This function will report a fatal error if it can't grow at + /// least to \p MinSize. + void *mallocForGrow(size_t MinSize, size_t TSize, size_t &NewCapacity); + /// This is an implementation of the grow() method which only works /// on POD-like data types and is out of line to reduce code duplication. /// This function will report a fatal error if it cannot increase capacity. - void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize); + void grow_pod(void *FirstEl, size_t MinSize, size_t TSize); public: size_t size() const { return Size; } @@ -90,8 +94,9 @@ using SmallVectorSizeType = /// Figure out the offset of the first element. template <class T, typename = void> struct SmallVectorAlignmentAndSize { - AlignedCharArrayUnion<SmallVectorBase<SmallVectorSizeType<T>>> Base; - AlignedCharArrayUnion<T> FirstEl; + alignas(SmallVectorBase<SmallVectorSizeType<T>>) char Base[sizeof( + SmallVectorBase<SmallVectorSizeType<T>>)]; + alignas(T) char FirstEl[sizeof(T)]; }; /// This is the part of SmallVectorTemplateBase which does not depend on whether @@ -115,8 +120,8 @@ class SmallVectorTemplateCommon protected: SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {} - void grow_pod(size_t MinCapacity, size_t TSize) { - Base::grow_pod(getFirstEl(), MinCapacity, TSize); + void grow_pod(size_t MinSize, size_t TSize) { + Base::grow_pod(getFirstEl(), MinSize, TSize); } /// Return true if this is a smallvector which has not had dynamic @@ -129,6 +134,102 @@ protected: this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect. } + /// Return true if V is an internal reference to the given range. + bool isReferenceToRange(const void *V, const void *First, const void *Last) const { + // Use std::less to avoid UB. + std::less<> LessThan; + return !LessThan(V, First) && LessThan(V, Last); + } + + /// Return true if V is an internal reference to this vector. + bool isReferenceToStorage(const void *V) const { + return isReferenceToRange(V, this->begin(), this->end()); + } + + /// Return true if First and Last form a valid (possibly empty) range in this + /// vector's storage. + bool isRangeInStorage(const void *First, const void *Last) const { + // Use std::less to avoid UB. + std::less<> LessThan; + return !LessThan(First, this->begin()) && !LessThan(Last, First) && + !LessThan(this->end(), Last); + } + + /// Return true unless Elt will be invalidated by resizing the vector to + /// NewSize. + bool isSafeToReferenceAfterResize(const void *Elt, size_t NewSize) { + // Past the end. + if (LLVM_LIKELY(!isReferenceToStorage(Elt))) + return true; + + // Return false if Elt will be destroyed by shrinking. + if (NewSize <= this->size()) + return Elt < this->begin() + NewSize; + + // Return false if we need to grow. + return NewSize <= this->capacity(); + } + + /// Check whether Elt will be invalidated by resizing the vector to NewSize. + void assertSafeToReferenceAfterResize(const void *Elt, size_t NewSize) { + assert(isSafeToReferenceAfterResize(Elt, NewSize) && + "Attempting to reference an element of the vector in an operation " + "that invalidates it"); + } + + /// Check whether Elt will be invalidated by increasing the size of the + /// vector by N. + void assertSafeToAdd(const void *Elt, size_t N = 1) { + this->assertSafeToReferenceAfterResize(Elt, this->size() + N); + } + + /// Check whether any part of the range will be invalidated by clearing. + void assertSafeToReferenceAfterClear(const T *From, const T *To) { + if (From == To) + return; + this->assertSafeToReferenceAfterResize(From, 0); + this->assertSafeToReferenceAfterResize(To - 1, 0); + } + template < + class ItTy, + std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value, + bool> = false> + void assertSafeToReferenceAfterClear(ItTy, ItTy) {} + + /// Check whether any part of the range will be invalidated by growing. + void assertSafeToAddRange(const T *From, const T *To) { + if (From == To) + return; + this->assertSafeToAdd(From, To - From); + this->assertSafeToAdd(To - 1, To - From); + } + template < + class ItTy, + std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value, + bool> = false> + void assertSafeToAddRange(ItTy, ItTy) {} + + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + template <class U> + static const T *reserveForParamAndGetAddressImpl(U *This, const T &Elt, + size_t N) { + size_t NewSize = This->size() + N; + if (LLVM_LIKELY(NewSize <= This->capacity())) + return &Elt; + + bool ReferencesStorage = false; + int64_t Index = -1; + if (!U::TakesParamByValue) { + if (LLVM_UNLIKELY(This->isReferenceToStorage(&Elt))) { + ReferencesStorage = true; + Index = &Elt - This->begin(); + } + } + This->grow(NewSize); + return ReferencesStorage ? This->begin() + Index : &Elt; + } + public: using size_type = size_t; using difference_type = ptrdiff_t; @@ -212,7 +313,12 @@ template <typename T, bool = (is_trivially_copy_constructible<T>::value) && (is_trivially_move_constructible<T>::value) && std::is_trivially_destructible<T>::value> class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> { + friend class SmallVectorTemplateCommon<T>; + protected: + static constexpr bool TakesParamByValue = false; + using ValueParamT = const T &; + SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} static void destroy_range(T *S, T *E) { @@ -242,18 +348,68 @@ protected: /// element, or MinSize more elements if specified. void grow(size_t MinSize = 0); + /// Create a new allocation big enough for \p MinSize and pass back its size + /// in \p NewCapacity. This is the first section of \a grow(). + T *mallocForGrow(size_t MinSize, size_t &NewCapacity) { + return static_cast<T *>( + SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow( + MinSize, sizeof(T), NewCapacity)); + } + + /// Move existing elements over to the new allocation \p NewElts, the middle + /// section of \a grow(). + void moveElementsForGrow(T *NewElts); + + /// Transfer ownership of the allocation, finishing up \a grow(). + void takeAllocationForGrow(T *NewElts, size_t NewCapacity); + + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) { + return this->reserveForParamAndGetAddressImpl(this, Elt, N); + } + + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) { + return const_cast<T *>( + this->reserveForParamAndGetAddressImpl(this, Elt, N)); + } + + static T &&forward_value_param(T &&V) { return std::move(V); } + static const T &forward_value_param(const T &V) { return V; } + + void growAndAssign(size_t NumElts, const T &Elt) { + // Grow manually in case Elt is an internal reference. + size_t NewCapacity; + T *NewElts = mallocForGrow(NumElts, NewCapacity); + std::uninitialized_fill_n(NewElts, NumElts, Elt); + this->destroy_range(this->begin(), this->end()); + takeAllocationForGrow(NewElts, NewCapacity); + this->set_size(NumElts); + } + + template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) { + // Grow manually in case one of Args is an internal reference. + size_t NewCapacity; + T *NewElts = mallocForGrow(0, NewCapacity); + ::new ((void *)(NewElts + this->size())) T(std::forward<ArgTypes>(Args)...); + moveElementsForGrow(NewElts); + takeAllocationForGrow(NewElts, NewCapacity); + this->set_size(this->size() + 1); + return this->back(); + } + public: void push_back(const T &Elt) { - if (LLVM_UNLIKELY(this->size() >= this->capacity())) - this->grow(); - ::new ((void*) this->end()) T(Elt); + const T *EltPtr = reserveForParamAndGetAddress(Elt); + ::new ((void *)this->end()) T(*EltPtr); this->set_size(this->size() + 1); } void push_back(T &&Elt) { - if (LLVM_UNLIKELY(this->size() >= this->capacity())) - this->grow(); - ::new ((void*) this->end()) T(::std::move(Elt)); + T *EltPtr = reserveForParamAndGetAddress(Elt); + ::new ((void *)this->end()) T(::std::move(*EltPtr)); this->set_size(this->size() + 1); } @@ -266,29 +422,27 @@ public: // Define this out-of-line to dissuade the C++ compiler from inlining it. template <typename T, bool TriviallyCopyable> void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) { - // Ensure we can fit the new capacity. - // This is only going to be applicable when the capacity is 32 bit. - if (MinSize > this->SizeTypeMax()) - report_bad_alloc_error("SmallVector capacity overflow during allocation"); - - // Ensure we can meet the guarantee of space for at least one more element. - // The above check alone will not catch the case where grow is called with a - // default MinCapacity of 0, but the current capacity cannot be increased. - // This is only going to be applicable when the capacity is 32 bit. - if (this->capacity() == this->SizeTypeMax()) - report_bad_alloc_error("SmallVector capacity unable to grow"); - - // Always grow, even from zero. - size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2)); - NewCapacity = std::min(std::max(NewCapacity, MinSize), this->SizeTypeMax()); - T *NewElts = static_cast<T*>(llvm::safe_malloc(NewCapacity*sizeof(T))); + size_t NewCapacity; + T *NewElts = mallocForGrow(MinSize, NewCapacity); + moveElementsForGrow(NewElts); + takeAllocationForGrow(NewElts, NewCapacity); +} +// Define this out-of-line to dissuade the C++ compiler from inlining it. +template <typename T, bool TriviallyCopyable> +void SmallVectorTemplateBase<T, TriviallyCopyable>::moveElementsForGrow( + T *NewElts) { // Move the elements over. this->uninitialized_move(this->begin(), this->end(), NewElts); // Destroy the original elements. destroy_range(this->begin(), this->end()); +} +// Define this out-of-line to dissuade the C++ compiler from inlining it. +template <typename T, bool TriviallyCopyable> +void SmallVectorTemplateBase<T, TriviallyCopyable>::takeAllocationForGrow( + T *NewElts, size_t NewCapacity) { // If this wasn't grown from the inline copy, deallocate the old space. if (!this->isSmall()) free(this->begin()); @@ -303,7 +457,18 @@ void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) { /// skipping destruction. template <typename T> class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> { + friend class SmallVectorTemplateCommon<T>; + protected: + /// True if it's cheap enough to take parameters by value. Doing so avoids + /// overhead related to mitigations for reference invalidation. + static constexpr bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void *); + + /// Either const T& or T, depending on whether it's cheap enough to take + /// parameters by value. + using ValueParamT = + typename std::conditional<TakesParamByValue, T, const T &>::type; + SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} // No need to do a destroy loop for POD's. @@ -344,11 +509,43 @@ protected: /// least one more element or MinSize if specified. void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); } + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) { + return this->reserveForParamAndGetAddressImpl(this, Elt, N); + } + + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) { + return const_cast<T *>( + this->reserveForParamAndGetAddressImpl(this, Elt, N)); + } + + /// Copy \p V or return a reference, depending on \a ValueParamT. + static ValueParamT forward_value_param(ValueParamT V) { return V; } + + void growAndAssign(size_t NumElts, T Elt) { + // Elt has been copied in case it's an internal reference, side-stepping + // reference invalidation problems without losing the realloc optimization. + this->set_size(0); + this->grow(NumElts); + std::uninitialized_fill_n(this->begin(), NumElts, Elt); + this->set_size(NumElts); + } + + template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) { + // Use push_back with a copy in case Args has an internal reference, + // side-stepping reference invalidation problems without losing the realloc + // optimization. + push_back(T(std::forward<ArgTypes>(Args)...)); + return this->back(); + } + public: - void push_back(const T &Elt) { - if (LLVM_UNLIKELY(this->size() >= this->capacity())) - this->grow(); - memcpy(reinterpret_cast<void *>(this->end()), &Elt, sizeof(T)); + void push_back(ValueParamT Elt) { + const T *EltPtr = reserveForParamAndGetAddress(Elt); + memcpy(reinterpret_cast<void *>(this->end()), EltPtr, sizeof(T)); this->set_size(this->size() + 1); } @@ -368,6 +565,9 @@ public: using size_type = typename SuperClass::size_type; protected: + using SmallVectorTemplateBase<T>::TakesParamByValue; + using ValueParamT = typename SuperClass::ValueParamT; + // Default ctor - Initialize to empty. explicit SmallVectorImpl(unsigned N) : SmallVectorTemplateBase<T>(N) {} @@ -387,29 +587,38 @@ public: this->Size = 0; } - void resize(size_type N) { +private: + template <bool ForOverwrite> void resizeImpl(size_type N) { if (N < this->size()) { - this->destroy_range(this->begin()+N, this->end()); - this->set_size(N); + this->pop_back_n(this->size() - N); } else if (N > this->size()) { - if (this->capacity() < N) - this->grow(N); + this->reserve(N); for (auto I = this->end(), E = this->begin() + N; I != E; ++I) - new (&*I) T(); + if (ForOverwrite) + new (&*I) T; + else + new (&*I) T(); this->set_size(N); } } - void resize(size_type N, const T &NV) { +public: + void resize(size_type N) { resizeImpl<false>(N); } + + /// Like resize, but \ref T is POD, the new values won't be initialized. + void resize_for_overwrite(size_type N) { resizeImpl<true>(N); } + + void resize(size_type N, ValueParamT NV) { + if (N == this->size()) + return; + if (N < this->size()) { - this->destroy_range(this->begin()+N, this->end()); - this->set_size(N); - } else if (N > this->size()) { - if (this->capacity() < N) - this->grow(N); - std::uninitialized_fill(this->end(), this->begin()+N, NV); - this->set_size(N); + this->pop_back_n(this->size() - N); + return; } + + // N > this->size(). Defer to append. + this->append(N - this->size(), NV); } void reserve(size_type N) { @@ -417,6 +626,12 @@ public: this->grow(N); } + void pop_back_n(size_type NumItems) { + assert(this->size() >= NumItems); + this->destroy_range(this->end() - NumItems, this->end()); + this->set_size(this->size() - NumItems); + } + LLVM_NODISCARD T pop_back_val() { T Result = ::std::move(this->back()); this->pop_back(); @@ -431,20 +646,17 @@ public: typename std::iterator_traits<in_iter>::iterator_category, std::input_iterator_tag>::value>> void append(in_iter in_start, in_iter in_end) { + this->assertSafeToAddRange(in_start, in_end); size_type NumInputs = std::distance(in_start, in_end); - if (NumInputs > this->capacity() - this->size()) - this->grow(this->size()+NumInputs); - + this->reserve(this->size() + NumInputs); this->uninitialized_copy(in_start, in_end, this->end()); this->set_size(this->size() + NumInputs); } /// Append \p NumInputs copies of \p Elt to the end. - void append(size_type NumInputs, const T &Elt) { - if (NumInputs > this->capacity() - this->size()) - this->grow(this->size()+NumInputs); - - std::uninitialized_fill_n(this->end(), NumInputs, Elt); + void append(size_type NumInputs, ValueParamT Elt) { + const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumInputs); + std::uninitialized_fill_n(this->end(), NumInputs, *EltPtr); this->set_size(this->size() + NumInputs); } @@ -452,22 +664,33 @@ public: append(IL.begin(), IL.end()); } - // FIXME: Consider assigning over existing elements, rather than clearing & - // re-initializing them - for all assign(...) variants. + void append(const SmallVectorImpl &RHS) { append(RHS.begin(), RHS.end()); } - void assign(size_type NumElts, const T &Elt) { - clear(); - if (this->capacity() < NumElts) - this->grow(NumElts); + void assign(size_type NumElts, ValueParamT Elt) { + // Note that Elt could be an internal reference. + if (NumElts > this->capacity()) { + this->growAndAssign(NumElts, Elt); + return; + } + + // Assign over existing elements. + std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt); + if (NumElts > this->size()) + std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt); + else if (NumElts < this->size()) + this->destroy_range(this->begin() + NumElts, this->end()); this->set_size(NumElts); - std::uninitialized_fill(this->begin(), this->end(), Elt); } + // FIXME: Consider assigning over existing elements, rather than clearing & + // re-initializing them - for all assign(...) variants. + template <typename in_iter, typename = std::enable_if_t<std::is_convertible< typename std::iterator_traits<in_iter>::iterator_category, std::input_iterator_tag>::value>> void assign(in_iter in_start, in_iter in_end) { + this->assertSafeToReferenceAfterClear(in_start, in_end); clear(); append(in_start, in_end); } @@ -477,12 +700,13 @@ public: append(IL); } + void assign(const SmallVectorImpl &RHS) { assign(RHS.begin(), RHS.end()); } + iterator erase(const_iterator CI) { // Just cast away constness because this is a non-const member function. iterator I = const_cast<iterator>(CI); - assert(I >= this->begin() && "Iterator to erase is out of bounds."); - assert(I < this->end() && "Erasing at past-the-end iterator."); + assert(this->isReferenceToStorage(CI) && "Iterator to erase is out of bounds."); iterator N = I; // Shift all elts down one. @@ -497,9 +721,7 @@ public: iterator S = const_cast<iterator>(CS); iterator E = const_cast<iterator>(CE); - assert(S >= this->begin() && "Range to erase is out of bounds."); - assert(S <= E && "Trying to erase invalid range."); - assert(E <= this->end() && "Trying to erase past the end."); + assert(this->isRangeInStorage(S, E) && "Range to erase is out of bounds."); iterator N = S; // Shift all elts down. @@ -510,20 +732,26 @@ public: return(N); } - iterator insert(iterator I, T &&Elt) { +private: + template <class ArgType> iterator insert_one_impl(iterator I, ArgType &&Elt) { + // Callers ensure that ArgType is derived from T. + static_assert( + std::is_same<std::remove_const_t<std::remove_reference_t<ArgType>>, + T>::value, + "ArgType must be derived from T!"); + if (I == this->end()) { // Important special case for empty vector. - this->push_back(::std::move(Elt)); + this->push_back(::std::forward<ArgType>(Elt)); return this->end()-1; } - assert(I >= this->begin() && "Insertion iterator is out of bounds."); - assert(I <= this->end() && "Inserting past the end of the vector."); + assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds."); - if (this->size() >= this->capacity()) { - size_t EltNo = I-this->begin(); - this->grow(); - I = this->begin()+EltNo; - } + // Grow if necessary. + size_t Index = I - this->begin(); + std::remove_reference_t<ArgType> *EltPtr = + this->reserveForParamAndGetAddress(Elt); + I = this->begin() + Index; ::new ((void*) this->end()) T(::std::move(this->back())); // Push everything else over. @@ -531,45 +759,26 @@ public: this->set_size(this->size() + 1); // If we just moved the element we're inserting, be sure to update - // the reference. - T *EltPtr = &Elt; - if (I <= EltPtr && EltPtr < this->end()) + // the reference (never happens if TakesParamByValue). + static_assert(!TakesParamByValue || std::is_same<ArgType, T>::value, + "ArgType must be 'T' when taking by value!"); + if (!TakesParamByValue && this->isReferenceToRange(EltPtr, I, this->end())) ++EltPtr; - *I = ::std::move(*EltPtr); + *I = ::std::forward<ArgType>(*EltPtr); return I; } - iterator insert(iterator I, const T &Elt) { - if (I == this->end()) { // Important special case for empty vector. - this->push_back(Elt); - return this->end()-1; - } - - assert(I >= this->begin() && "Insertion iterator is out of bounds."); - assert(I <= this->end() && "Inserting past the end of the vector."); - - if (this->size() >= this->capacity()) { - size_t EltNo = I-this->begin(); - this->grow(); - I = this->begin()+EltNo; - } - ::new ((void*) this->end()) T(std::move(this->back())); - // Push everything else over. - std::move_backward(I, this->end()-1, this->end()); - this->set_size(this->size() + 1); - - // If we just moved the element we're inserting, be sure to update - // the reference. - const T *EltPtr = &Elt; - if (I <= EltPtr && EltPtr < this->end()) - ++EltPtr; +public: + iterator insert(iterator I, T &&Elt) { + return insert_one_impl(I, this->forward_value_param(std::move(Elt))); + } - *I = *EltPtr; - return I; + iterator insert(iterator I, const T &Elt) { + return insert_one_impl(I, this->forward_value_param(Elt)); } - iterator insert(iterator I, size_type NumToInsert, const T &Elt) { + iterator insert(iterator I, size_type NumToInsert, ValueParamT Elt) { // Convert iterator to elt# to avoid invalidating iterator when we reserve() size_t InsertElt = I - this->begin(); @@ -578,11 +787,11 @@ public: return this->begin()+InsertElt; } - assert(I >= this->begin() && "Insertion iterator is out of bounds."); - assert(I <= this->end() && "Inserting past the end of the vector."); + assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds."); - // Ensure there is enough space. - reserve(this->size() + NumToInsert); + // Ensure there is enough space, and get the (maybe updated) address of + // Elt. + const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumToInsert); // Uninvalidate the iterator. I = this->begin()+InsertElt; @@ -599,7 +808,12 @@ public: // Copy the existing elements that get replaced. std::move_backward(I, OldEnd-NumToInsert, OldEnd); - std::fill_n(I, NumToInsert, Elt); + // If we just moved the element we're inserting, be sure to update + // the reference (never happens if TakesParamByValue). + if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end()) + EltPtr += NumToInsert; + + std::fill_n(I, NumToInsert, *EltPtr); return I; } @@ -612,11 +826,16 @@ public: size_t NumOverwritten = OldEnd-I; this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten); + // If we just moved the element we're inserting, be sure to update + // the reference (never happens if TakesParamByValue). + if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end()) + EltPtr += NumToInsert; + // Replace the overwritten part. - std::fill_n(I, NumOverwritten, Elt); + std::fill_n(I, NumOverwritten, *EltPtr); // Insert the non-overwritten middle part. - std::uninitialized_fill_n(OldEnd, NumToInsert-NumOverwritten, Elt); + std::uninitialized_fill_n(OldEnd, NumToInsert - NumOverwritten, *EltPtr); return I; } @@ -633,8 +852,10 @@ public: return this->begin()+InsertElt; } - assert(I >= this->begin() && "Insertion iterator is out of bounds."); - assert(I <= this->end() && "Inserting past the end of the vector."); + assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds."); + + // Check that the reserve that follows doesn't invalidate the iterators. + this->assertSafeToAddRange(From, To); size_t NumToInsert = std::distance(From, To); @@ -686,7 +907,8 @@ public: template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) { if (LLVM_UNLIKELY(this->size() >= this->capacity())) - this->grow(); + return this->growAndEmplaceBack(std::forward<ArgTypes>(Args)...); + ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...); this->set_size(this->size() + 1); return this->back(); @@ -721,10 +943,8 @@ void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) { std::swap(this->Capacity, RHS.Capacity); return; } - if (RHS.size() > this->capacity()) - this->grow(RHS.size()); - if (this->size() > RHS.capacity()) - RHS.grow(this->size()); + this->reserve(RHS.size()); + RHS.reserve(this->size()); // Swap the shared elements. size_t NumShared = this->size(); @@ -779,8 +999,7 @@ SmallVectorImpl<T> &SmallVectorImpl<T>:: // FIXME: don't do this if they're efficiently moveable. if (this->capacity() < RHSSize) { // Destroy current elements. - this->destroy_range(this->begin(), this->end()); - this->set_size(0); + this->clear(); CurSize = 0; this->grow(RHSSize); } else if (CurSize) { @@ -839,8 +1058,7 @@ SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) { // elements. if (this->capacity() < RHSSize) { // Destroy current elements. - this->destroy_range(this->begin(), this->end()); - this->set_size(0); + this->clear(); CurSize = 0; this->grow(RHSSize); } else if (CurSize) { @@ -863,13 +1081,71 @@ SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) { /// to avoid allocating unnecessary storage. template <typename T, unsigned N> struct SmallVectorStorage { - AlignedCharArrayUnion<T> InlineElts[N]; + alignas(T) char InlineElts[N * sizeof(T)]; }; /// We need the storage to be properly aligned even for small-size of 0 so that /// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is /// well-defined. -template <typename T> struct alignas(alignof(T)) SmallVectorStorage<T, 0> {}; +template <typename T> struct alignas(T) SmallVectorStorage<T, 0> {}; + +/// Forward declaration of SmallVector so that +/// calculateSmallVectorDefaultInlinedElements can reference +/// `sizeof(SmallVector<T, 0>)`. +template <typename T, unsigned N> class LLVM_GSL_OWNER SmallVector; + +/// Helper class for calculating the default number of inline elements for +/// `SmallVector<T>`. +/// +/// This should be migrated to a constexpr function when our minimum +/// compiler support is enough for multi-statement constexpr functions. +template <typename T> struct CalculateSmallVectorDefaultInlinedElements { + // Parameter controlling the default number of inlined elements + // for `SmallVector<T>`. + // + // The default number of inlined elements ensures that + // 1. There is at least one inlined element. + // 2. `sizeof(SmallVector<T>) <= kPreferredSmallVectorSizeof` unless + // it contradicts 1. + static constexpr size_t kPreferredSmallVectorSizeof = 64; + + // static_assert that sizeof(T) is not "too big". + // + // Because our policy guarantees at least one inlined element, it is possible + // for an arbitrarily large inlined element to allocate an arbitrarily large + // amount of inline storage. We generally consider it an antipattern for a + // SmallVector to allocate an excessive amount of inline storage, so we want + // to call attention to these cases and make sure that users are making an + // intentional decision if they request a lot of inline storage. + // + // We want this assertion to trigger in pathological cases, but otherwise + // not be too easy to hit. To accomplish that, the cutoff is actually somewhat + // larger than kPreferredSmallVectorSizeof (otherwise, + // `SmallVector<SmallVector<T>>` would be one easy way to trip it, and that + // pattern seems useful in practice). + // + // One wrinkle is that this assertion is in theory non-portable, since + // sizeof(T) is in general platform-dependent. However, we don't expect this + // to be much of an issue, because most LLVM development happens on 64-bit + // hosts, and therefore sizeof(T) is expected to *decrease* when compiled for + // 32-bit hosts, dodging the issue. The reverse situation, where development + // happens on a 32-bit host and then fails due to sizeof(T) *increasing* on a + // 64-bit host, is expected to be very rare. + static_assert( + sizeof(T) <= 256, + "You are trying to use a default number of inlined elements for " + "`SmallVector<T>` but `sizeof(T)` is really big! Please use an " + "explicit number of inlined elements with `SmallVector<T, N>` to make " + "sure you really want that much inline storage."); + + // Discount the size of the header itself when calculating the maximum inline + // bytes. + static constexpr size_t PreferredInlineBytes = + kPreferredSmallVectorSizeof - sizeof(SmallVector<T, 0>); + static constexpr size_t NumElementsThatFit = PreferredInlineBytes / sizeof(T); + static constexpr size_t value = + NumElementsThatFit == 0 ? 1 : NumElementsThatFit; +}; /// This is a 'vector' (really, a variable-sized array), optimized /// for the case when the array is small. It contains some number of elements @@ -877,9 +1153,18 @@ template <typename T> struct alignas(alignof(T)) SmallVectorStorage<T, 0> {}; /// elements is below that threshold. This allows normal "small" cases to be /// fast without losing generality for large inputs. /// -/// Note that this does not attempt to be exception safe. +/// \note +/// In the absence of a well-motivated choice for the number of inlined +/// elements \p N, it is recommended to use \c SmallVector<T> (that is, +/// omitting the \p N). This will choose a default number of inlined elements +/// reasonable for allocation on the stack (for example, trying to keep \c +/// sizeof(SmallVector<T>) around 64 bytes). /// -template <typename T, unsigned N> +/// \warning This does not attempt to be exception safe. +/// +/// \see https://llvm.org/docs/ProgrammersManual.html#llvm-adt-smallvector-h +template <typename T, + unsigned N = CalculateSmallVectorDefaultInlinedElements<T>::value> class LLVM_GSL_OWNER SmallVector : public SmallVectorImpl<T>, SmallVectorStorage<T, N> { public: @@ -918,7 +1203,7 @@ public: SmallVectorImpl<T>::operator=(RHS); } - const SmallVector &operator=(const SmallVector &RHS) { + SmallVector &operator=(const SmallVector &RHS) { SmallVectorImpl<T>::operator=(RHS); return *this; } @@ -933,17 +1218,17 @@ public: SmallVectorImpl<T>::operator=(::std::move(RHS)); } - const SmallVector &operator=(SmallVector &&RHS) { + SmallVector &operator=(SmallVector &&RHS) { SmallVectorImpl<T>::operator=(::std::move(RHS)); return *this; } - const SmallVector &operator=(SmallVectorImpl<T> &&RHS) { + SmallVector &operator=(SmallVectorImpl<T> &&RHS) { SmallVectorImpl<T>::operator=(::std::move(RHS)); return *this; } - const SmallVector &operator=(std::initializer_list<T> IL) { + SmallVector &operator=(std::initializer_list<T> IL) { this->assign(IL); return *this; } diff --git a/llvm/include/llvm/ADT/SparseSet.h b/llvm/include/llvm/ADT/SparseSet.h index 74457d5fd679..d8acf1ee2f3a 100644 --- a/llvm/include/llvm/ADT/SparseSet.h +++ b/llvm/include/llvm/ADT/SparseSet.h @@ -229,12 +229,15 @@ public: return const_cast<SparseSet*>(this)->findIndex(KeyIndexOf(Key)); } + /// Check if the set contains the given \c Key. + /// + /// @param Key A valid key to find. + bool contains(const KeyT &Key) const { return find(Key) == end() ? 0 : 1; } + /// count - Returns 1 if this set contains an element identified by Key, /// 0 otherwise. /// - size_type count(const KeyT &Key) const { - return find(Key) == end() ? 0 : 1; - } + size_type count(const KeyT &Key) const { return contains(Key) ? 1 : 0; } /// insert - Attempts to insert a new element. /// diff --git a/llvm/include/llvm/ADT/Statistic.h b/llvm/include/llvm/ADT/Statistic.h index d7aff6c5939a..aa338ccff19a 100644 --- a/llvm/include/llvm/ADT/Statistic.h +++ b/llvm/include/llvm/ADT/Statistic.h @@ -36,6 +36,8 @@ // configure time. #if !defined(NDEBUG) || LLVM_FORCE_ENABLE_STATS #define LLVM_ENABLE_STATS 1 +#else +#define LLVM_ENABLE_STATS 0 #endif namespace llvm { diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h index 990a3054a9d2..68e89508cba9 100644 --- a/llvm/include/llvm/ADT/StringExtras.h +++ b/llvm/include/llvm/ADT/StringExtras.h @@ -66,17 +66,29 @@ inline ArrayRef<uint8_t> arrayRefFromStringRef(StringRef Input) { /// /// If \p C is not a valid hex digit, -1U is returned. inline unsigned hexDigitValue(char C) { - if (C >= '0' && C <= '9') return C-'0'; - if (C >= 'a' && C <= 'f') return C-'a'+10U; - if (C >= 'A' && C <= 'F') return C-'A'+10U; - return -1U; + struct HexTable { + unsigned LUT[255] = {}; + constexpr HexTable() { + // Default initialize everything to invalid. + for (int i = 0; i < 255; ++i) + LUT[i] = ~0U; + // Initialize `0`-`9`. + for (int i = 0; i < 10; ++i) + LUT['0' + i] = i; + // Initialize `A`-`F` and `a`-`f`. + for (int i = 0; i < 6; ++i) + LUT['A' + i] = LUT['a' + i] = 10 + i; + } + }; + constexpr HexTable Table; + return Table.LUT[static_cast<unsigned char>(C)]; } /// Checks if character \p C is one of the 10 decimal digits. inline bool isDigit(char C) { return C >= '0' && C <= '9'; } /// Checks if character \p C is a hexadecimal numeric character. -inline bool isHexDigit(char C) { return hexDigitValue(C) != -1U; } +inline bool isHexDigit(char C) { return hexDigitValue(C) != ~0U; } /// Checks if character \p C is a valid letter as classified by "C" locale. inline bool isAlpha(char C) { @@ -165,34 +177,68 @@ inline std::string toHex(ArrayRef<uint8_t> Input, bool LowerCase = false) { return toHex(toStringRef(Input), LowerCase); } -inline uint8_t hexFromNibbles(char MSB, char LSB) { +/// Store the binary representation of the two provided values, \p MSB and +/// \p LSB, that make up the nibbles of a hexadecimal digit. If \p MSB or \p LSB +/// do not correspond to proper nibbles of a hexadecimal digit, this method +/// returns false. Otherwise, returns true. +inline bool tryGetHexFromNibbles(char MSB, char LSB, uint8_t &Hex) { unsigned U1 = hexDigitValue(MSB); unsigned U2 = hexDigitValue(LSB); - assert(U1 != -1U && U2 != -1U); + if (U1 == ~0U || U2 == ~0U) + return false; - return static_cast<uint8_t>((U1 << 4) | U2); + Hex = static_cast<uint8_t>((U1 << 4) | U2); + return true; } -/// Convert hexadecimal string \p Input to its binary representation. -/// The return string is half the size of \p Input. -inline std::string fromHex(StringRef Input) { +/// Return the binary representation of the two provided values, \p MSB and +/// \p LSB, that make up the nibbles of a hexadecimal digit. +inline uint8_t hexFromNibbles(char MSB, char LSB) { + uint8_t Hex = 0; + bool GotHex = tryGetHexFromNibbles(MSB, LSB, Hex); + (void)GotHex; + assert(GotHex && "MSB and/or LSB do not correspond to hex digits"); + return Hex; +} + +/// Convert hexadecimal string \p Input to its binary representation and store +/// the result in \p Output. Returns true if the binary representation could be +/// converted from the hexadecimal string. Returns false if \p Input contains +/// non-hexadecimal digits. The output string is half the size of \p Input. +inline bool tryGetFromHex(StringRef Input, std::string &Output) { if (Input.empty()) - return std::string(); + return true; - std::string Output; Output.reserve((Input.size() + 1) / 2); if (Input.size() % 2 == 1) { - Output.push_back(hexFromNibbles('0', Input.front())); + uint8_t Hex = 0; + if (!tryGetHexFromNibbles('0', Input.front(), Hex)) + return false; + + Output.push_back(Hex); Input = Input.drop_front(); } assert(Input.size() % 2 == 0); while (!Input.empty()) { - uint8_t Hex = hexFromNibbles(Input[0], Input[1]); + uint8_t Hex = 0; + if (!tryGetHexFromNibbles(Input[0], Input[1], Hex)) + return false; + Output.push_back(Hex); Input = Input.drop_front(2); } - return Output; + return true; +} + +/// Convert hexadecimal string \p Input to its binary representation. +/// The return string is half the size of \p Input. +inline std::string fromHex(StringRef Input) { + std::string Hex; + bool GotHex = tryGetFromHex(Input, Hex); + (void)GotHex; + assert(GotHex && "Input contains non hex digits"); + return Hex; } /// Convert the string \p S to an integer of the specified type using @@ -245,7 +291,7 @@ inline std::string utostr(uint64_t X, bool isNeg = false) { inline std::string itostr(int64_t X) { if (X < 0) - return utostr(-static_cast<uint64_t>(X), true); + return utostr(static_cast<uint64_t>(1) + ~static_cast<uint64_t>(X), true); else return utostr(static_cast<uint64_t>(X)); } @@ -338,13 +384,16 @@ inline std::string join_impl(IteratorT Begin, IteratorT End, size_t Len = (std::distance(Begin, End) - 1) * Separator.size(); for (IteratorT I = Begin; I != End; ++I) - Len += (*Begin).size(); + Len += (*I).size(); S.reserve(Len); + size_t PrevCapacity = S.capacity(); + (void)PrevCapacity; S += (*Begin); while (++Begin != End) { S += Separator; S += (*Begin); } + assert(PrevCapacity == S.capacity() && "String grew during building"); return S; } @@ -416,6 +465,30 @@ inline std::string join_items(Sep Separator, Args &&... Items) { return Result; } +/// A helper class to return the specified delimiter string after the first +/// invocation of operator StringRef(). Used to generate a comma-separated +/// list from a loop like so: +/// +/// \code +/// ListSeparator LS; +/// for (auto &I : C) +/// OS << LS << I.getName(); +/// \end +class ListSeparator { + bool First = true; + StringRef Separator; + +public: + ListSeparator(StringRef Separator = ", ") : Separator(Separator) {} + operator StringRef() { + if (First) { + First = false; + return {}; + } + return Separator; + } +}; + } // end namespace llvm #endif // LLVM_ADT_STRINGEXTRAS_H diff --git a/llvm/include/llvm/ADT/StringMap.h b/llvm/include/llvm/ADT/StringMap.h index 840f328db796..a82afc9a817c 100644 --- a/llvm/include/llvm/ADT/StringMap.h +++ b/llvm/include/llvm/ADT/StringMap.h @@ -78,10 +78,12 @@ protected: void init(unsigned Size); public: + static constexpr uintptr_t TombstoneIntVal = + static_cast<uintptr_t>(-1) + << PointerLikeTypeTraits<StringMapEntryBase *>::NumLowBitsAvailable; + static StringMapEntryBase *getTombstoneVal() { - uintptr_t Val = static_cast<uintptr_t>(-1); - Val <<= PointerLikeTypeTraits<StringMapEntryBase *>::NumLowBitsAvailable; - return reinterpret_cast<StringMapEntryBase *>(Val); + return reinterpret_cast<StringMapEntryBase *>(TombstoneIntVal); } unsigned getNumBuckets() const { return NumBuckets; } @@ -387,7 +389,9 @@ public: return static_cast<DerivedTy &>(*this); } - bool operator==(const DerivedTy &RHS) const { return Ptr == RHS.Ptr; } + friend bool operator==(const DerivedTy &LHS, const DerivedTy &RHS) { + return LHS.Ptr == RHS.Ptr; + } DerivedTy &operator++() { // Preincrement ++Ptr; diff --git a/llvm/include/llvm/ADT/StringSet.h b/llvm/include/llvm/ADT/StringSet.h index 63d929399a4e..c4245175544b 100644 --- a/llvm/include/llvm/ADT/StringSet.h +++ b/llvm/include/llvm/ADT/StringSet.h @@ -45,6 +45,9 @@ public: insert(const StringMapEntry<ValueTy> &mapEntry) { return insert(mapEntry.getKey()); } + + /// Check if the set contains the given \c key. + bool contains(StringRef key) const { return Base::FindKey(key) != -1; } }; } // end namespace llvm diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index 6bad18f19244..eed315c929ad 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -56,6 +56,7 @@ public: avr, // AVR: Atmel AVR microcontroller bpfel, // eBPF or extended BPF or 64-bit BPF (little endian) bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian) + csky, // CSKY: csky hexagon, // Hexagon: hexagon mips, // MIPS: mips, mipsallegrex, mipsr6 mipsel, // MIPSEL: mipsel, mipsallegrexe, mipsr6el @@ -63,6 +64,7 @@ public: mips64el, // MIPS64EL: mips64el, mips64r6el, mipsn32el, mipsn32r6el msp430, // MSP430: msp430 ppc, // PPC: powerpc + ppcle, // PPCLE: powerpc (little endian) ppc64, // PPC64: powerpc64, ppu ppc64le, // PPC64LE: powerpc64le r600, // R600: AMD GPUs HD2XXX - HD6XXX @@ -103,6 +105,7 @@ public: enum SubArchType { NoSubArch, + ARMSubArch_v8_7a, ARMSubArch_v8_6a, ARMSubArch_v8_5a, ARMSubArch_v8_4a, @@ -128,6 +131,8 @@ public: ARMSubArch_v5te, ARMSubArch_v4t, + AArch64SubArch_arm64e, + KalimbaSubArch_v3, KalimbaSubArch_v4, KalimbaSubArch_v5, @@ -142,8 +147,6 @@ public: Apple, PC, SCEI, - BGP, - BGQ, Freescale, IBM, ImaginationTechnologies, @@ -175,11 +178,11 @@ public: OpenBSD, Solaris, Win32, + ZOS, Haiku, Minix, RTEMS, NaCl, // Native Client - CNK, // BG/P Compute-Node Kernel AIX, CUDA, // NVIDIA CUDA NVCL, // NVIDIA OpenCL @@ -206,6 +209,7 @@ public: GNUEABI, GNUEABIHF, GNUX32, + GNUILP32, CODE16, EABI, EABIHF, @@ -227,6 +231,7 @@ public: COFF, ELF, + GOFF, MachO, Wasm, XCOFF, @@ -471,6 +476,8 @@ public: return getSubArch() == Triple::ARMSubArch_v7k; } + bool isOSzOS() const { return getOS() == Triple::ZOS; } + /// isOSDarwin - Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS). bool isOSDarwin() const { return isMacOSX() || isiOS() || isWatchOS(); @@ -484,6 +491,12 @@ public: return getEnvironment() == Triple::MacABI; } + /// Returns true for targets that run on a macOS machine. + bool isTargetMachineMac() const { + return isMacOSX() || (isOSDarwin() && (isSimulatorEnvironment() || + isMacCatalystEnvironment())); + } + bool isOSNetBSD() const { return getOS() == Triple::NetBSD; } @@ -623,6 +636,9 @@ public: return getObjectFormat() == Triple::COFF; } + /// Tests whether the OS uses the GOFF binary format. + bool isOSBinFormatGOFF() const { return getObjectFormat() == Triple::GOFF; } + /// Tests whether the environment is MachO. bool isOSBinFormatMachO() const { return getObjectFormat() == Triple::MachO; @@ -703,7 +719,20 @@ public: /// Tests whether the target is AArch64 (little and big endian). bool isAArch64() const { - return getArch() == Triple::aarch64 || getArch() == Triple::aarch64_be; + return getArch() == Triple::aarch64 || getArch() == Triple::aarch64_be || + getArch() == Triple::aarch64_32; + } + + /// Tests whether the target is AArch64 and pointers are the size specified by + /// \p PointerWidth. + bool isAArch64(int PointerWidth) const { + assert(PointerWidth == 64 || PointerWidth == 32); + if (!isAArch64()) + return false; + return getArch() == Triple::aarch64_32 || + getEnvironment() == Triple::GNUILP32 + ? PointerWidth == 32 + : PointerWidth == 64; } /// Tests whether the target is MIPS 32-bit (little and big endian). @@ -721,6 +750,17 @@ public: return isMIPS32() || isMIPS64(); } + /// Tests whether the target is PowerPC (32- or 64-bit LE or BE). + bool isPPC() const { + return getArch() == Triple::ppc || getArch() == Triple::ppc64 || + getArch() == Triple::ppcle || getArch() == Triple::ppc64le; + } + + /// Tests whether the target is 32-bit PowerPC (little and big endian). + bool isPPC32() const { + return getArch() == Triple::ppc || getArch() == Triple::ppcle; + } + /// Tests whether the target is 64-bit PowerPC (little and big endian). bool isPPC64() const { return getArch() == Triple::ppc64 || getArch() == Triple::ppc64le; @@ -751,6 +791,17 @@ public: return getArch() == Triple::wasm32 || getArch() == Triple::wasm64; } + // Tests whether the target is CSKY + bool isCSKY() const { + return getArch() == Triple::csky; + } + + /// Tests whether the target is the Apple "arm64e" AArch64 subarch. + bool isArm64e() const { + return getArch() == Triple::aarch64 && + getSubArch() == Triple::AArch64SubArch_arm64e; + } + /// Tests whether the target supports comdat bool supportsCOMDAT() const { return !(isOSBinFormatMachO() || isOSBinFormatXCOFF()); @@ -761,6 +812,14 @@ public: return isAndroid() || isOSOpenBSD() || isWindowsCygwinEnvironment(); } + /// Tests whether the target uses -data-sections as default. + bool hasDefaultDataSections() const { + return isOSBinFormatXCOFF() || isWasm(); + } + + /// Tests if the environment supports dllimport/export annotations. + bool hasDLLImportExport() const { return isOSWindows() || isPS4CPU(); } + /// @} /// @name Mutators /// @{ diff --git a/llvm/include/llvm/ADT/iterator.h b/llvm/include/llvm/ADT/iterator.h index 9a1f6e1511e7..6625a3f6179e 100644 --- a/llvm/include/llvm/ADT/iterator.h +++ b/llvm/include/llvm/ADT/iterator.h @@ -142,28 +142,30 @@ public: return tmp; } +#ifndef __cpp_impl_three_way_comparison bool operator!=(const DerivedT &RHS) const { - return !static_cast<const DerivedT *>(this)->operator==(RHS); + return !(static_cast<const DerivedT &>(*this) == RHS); } +#endif bool operator>(const DerivedT &RHS) const { static_assert( IsRandomAccess, "Relational operators are only defined for random access iterators."); - return !static_cast<const DerivedT *>(this)->operator<(RHS) && - !static_cast<const DerivedT *>(this)->operator==(RHS); + return !(static_cast<const DerivedT &>(*this) < RHS) && + !(static_cast<const DerivedT &>(*this) == RHS); } bool operator<=(const DerivedT &RHS) const { static_assert( IsRandomAccess, "Relational operators are only defined for random access iterators."); - return !static_cast<const DerivedT *>(this)->operator>(RHS); + return !(static_cast<const DerivedT &>(*this) > RHS); } bool operator>=(const DerivedT &RHS) const { static_assert( IsRandomAccess, "Relational operators are only defined for random access iterators."); - return !static_cast<const DerivedT *>(this)->operator<(RHS); + return !(static_cast<const DerivedT &>(*this) < RHS); } PointerT operator->() { return &static_cast<DerivedT *>(this)->operator*(); } @@ -260,12 +262,16 @@ public: return *static_cast<DerivedT *>(this); } - bool operator==(const DerivedT &RHS) const { return I == RHS.I; } - bool operator<(const DerivedT &RHS) const { + friend bool operator==(const iterator_adaptor_base &LHS, + const iterator_adaptor_base &RHS) { + return LHS.I == RHS.I; + } + friend bool operator<(const iterator_adaptor_base &LHS, + const iterator_adaptor_base &RHS) { static_assert( BaseT::IsRandomAccess, "Relational operators are only defined for random access iterators."); - return I < RHS.I; + return LHS.I < RHS.I; } ReferenceT operator*() const { return *I; } diff --git a/llvm/include/llvm/ADT/iterator_range.h b/llvm/include/llvm/ADT/iterator_range.h index f038f6bf2128..a9b46a3aa45b 100644 --- a/llvm/include/llvm/ADT/iterator_range.h +++ b/llvm/include/llvm/ADT/iterator_range.h @@ -18,7 +18,6 @@ #ifndef LLVM_ADT_ITERATOR_RANGE_H #define LLVM_ADT_ITERATOR_RANGE_H -#include <iterator> #include <utility> namespace llvm { diff --git a/llvm/include/llvm/ADT/simple_ilist.h b/llvm/include/llvm/ADT/simple_ilist.h index 9257b47b9cf8..d4b6be347219 100644 --- a/llvm/include/llvm/ADT/simple_ilist.h +++ b/llvm/include/llvm/ADT/simple_ilist.h @@ -28,8 +28,8 @@ namespace llvm { /// This is a simple intrusive list for a \c T that inherits from \c /// ilist_node<T>. The list never takes ownership of anything inserted in it. /// -/// Unlike \a iplist<T> and \a ilist<T>, \a simple_ilist<T> never allocates or -/// deletes values, and has no callback traits. +/// Unlike \a iplist<T> and \a ilist<T>, \a simple_ilist<T> never deletes +/// values, and has no callback traits. /// /// The API for adding nodes include \a push_front(), \a push_back(), and \a /// insert(). These all take values by reference (not by pointer), except for @@ -52,7 +52,7 @@ namespace llvm { /// to calling \a std::for_each() on the range to be discarded. /// /// The currently available \p Options customize the nodes in the list. The -/// same options must be specified in the \a ilist_node instantation for +/// same options must be specified in the \a ilist_node instantiation for /// compatibility (although the order is irrelevant). /// \li Use \a ilist_tag to designate which ilist_node for a given \p T this /// list should use. This is useful if a type \p T is part of multiple, diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index c35ee2f499de..9f7461243f35 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -42,10 +42,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include <cstdint> @@ -56,9 +52,17 @@ namespace llvm { class AnalysisUsage; +class AtomicCmpXchgInst; class BasicAAResult; class BasicBlock; +class CatchPadInst; +class CatchReturnInst; class DominatorTree; +class FenceInst; +class Function; +class InvokeInst; +class PreservedAnalyses; +class TargetLibraryInfo; class Value; /// The possible results of an alias query. @@ -342,12 +346,28 @@ createModRefInfo(const FunctionModRefBehavior FMRB) { class AAQueryInfo { public: using LocPair = std::pair<MemoryLocation, MemoryLocation>; - using AliasCacheT = SmallDenseMap<LocPair, AliasResult, 8>; + struct CacheEntry { + AliasResult Result; + /// Number of times a NoAlias assumption has been used. + /// 0 for assumptions that have not been used, -1 for definitive results. + int NumAssumptionUses; + /// Whether this is a definitive (non-assumption) result. + bool isDefinitive() const { return NumAssumptionUses < 0; } + }; + using AliasCacheT = SmallDenseMap<LocPair, CacheEntry, 8>; AliasCacheT AliasCache; using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>; IsCapturedCacheT IsCapturedCache; + /// How many active NoAlias assumption uses there are. + int NumAssumptionUses = 0; + + /// Location pairs for which an assumption based result is currently stored. + /// Used to remove all potentially incorrect results from the cache if an + /// assumption is disproven. + SmallVector<AAQueryInfo::LocPair, 4> AssumptionBasedResults; + AAQueryInfo() : AliasCache(), IsCapturedCache() {} }; @@ -401,7 +421,8 @@ public: /// A convenience wrapper around the primary \c alias interface. AliasResult alias(const Value *V1, const Value *V2) { - return alias(V1, LocationSize::unknown(), V2, LocationSize::unknown()); + return alias(MemoryLocation::getBeforeOrAfter(V1), + MemoryLocation::getBeforeOrAfter(V2)); } /// A trivial helper function to check to see if the specified pointers are @@ -418,7 +439,8 @@ public: /// A convenience wrapper around the \c isNoAlias helper interface. bool isNoAlias(const Value *V1, const Value *V2) { - return isNoAlias(MemoryLocation(V1), MemoryLocation(V2)); + return isNoAlias(MemoryLocation::getBeforeOrAfter(V1), + MemoryLocation::getBeforeOrAfter(V2)); } /// A trivial helper function to check to see if the specified pointers are @@ -440,7 +462,7 @@ public: /// A convenience wrapper around the primary \c pointsToConstantMemory /// interface. bool pointsToConstantMemory(const Value *P, bool OrLocal = false) { - return pointsToConstantMemory(MemoryLocation(P), OrLocal); + return pointsToConstantMemory(MemoryLocation::getBeforeOrAfter(P), OrLocal); } /// @} @@ -533,7 +555,7 @@ public: /// write at most from objects pointed to by their pointer-typed arguments /// (with arbitrary offsets). static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB) { - return !(MRB & FMRL_Anywhere & ~FMRL_ArgumentPointees); + return !((unsigned)MRB & FMRL_Anywhere & ~FMRL_ArgumentPointees); } /// Checks if functions with the specified behavior are known to potentially @@ -541,26 +563,27 @@ public: /// (with arbitrary offsets). static bool doesAccessArgPointees(FunctionModRefBehavior MRB) { return isModOrRefSet(createModRefInfo(MRB)) && - (MRB & FMRL_ArgumentPointees); + ((unsigned)MRB & FMRL_ArgumentPointees); } /// Checks if functions with the specified behavior are known to read and /// write at most from memory that is inaccessible from LLVM IR. static bool onlyAccessesInaccessibleMem(FunctionModRefBehavior MRB) { - return !(MRB & FMRL_Anywhere & ~FMRL_InaccessibleMem); + return !((unsigned)MRB & FMRL_Anywhere & ~FMRL_InaccessibleMem); } /// Checks if functions with the specified behavior are known to potentially /// read or write from memory that is inaccessible from LLVM IR. static bool doesAccessInaccessibleMem(FunctionModRefBehavior MRB) { - return isModOrRefSet(createModRefInfo(MRB)) && (MRB & FMRL_InaccessibleMem); + return isModOrRefSet(createModRefInfo(MRB)) && + ((unsigned)MRB & FMRL_InaccessibleMem); } /// Checks if functions with the specified behavior are known to read and /// write at most from memory that is inaccessible from LLVM IR or objects /// pointed to by their pointer-typed arguments (with arbitrary offsets). static bool onlyAccessesInaccessibleOrArgMem(FunctionModRefBehavior MRB) { - return !(MRB & FMRL_Anywhere & + return !((unsigned)MRB & FMRL_Anywhere & ~(FMRL_InaccessibleMem | FMRL_ArgumentPointees)); } @@ -760,40 +783,7 @@ private: AAQueryInfo &AAQI); ModRefInfo getModRefInfo(const Instruction *I, const Optional<MemoryLocation> &OptLoc, - AAQueryInfo &AAQIP) { - if (OptLoc == None) { - if (const auto *Call = dyn_cast<CallBase>(I)) { - return createModRefInfo(getModRefBehavior(Call)); - } - } - - const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation()); - - switch (I->getOpcode()) { - case Instruction::VAArg: - return getModRefInfo((const VAArgInst *)I, Loc, AAQIP); - case Instruction::Load: - return getModRefInfo((const LoadInst *)I, Loc, AAQIP); - case Instruction::Store: - return getModRefInfo((const StoreInst *)I, Loc, AAQIP); - case Instruction::Fence: - return getModRefInfo((const FenceInst *)I, Loc, AAQIP); - case Instruction::AtomicCmpXchg: - return getModRefInfo((const AtomicCmpXchgInst *)I, Loc, AAQIP); - case Instruction::AtomicRMW: - return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP); - case Instruction::Call: - return getModRefInfo((const CallInst *)I, Loc, AAQIP); - case Instruction::Invoke: - return getModRefInfo((const InvokeInst *)I, Loc, AAQIP); - case Instruction::CatchPad: - return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP); - case Instruction::CatchRet: - return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP); - default: - return ModRefInfo::NoModRef; - } - } + AAQueryInfo &AAQIP); class Concept; @@ -807,6 +797,9 @@ private: std::vector<AnalysisKey *> AADeps; + /// Query depth used to distinguish recursive queries. + unsigned Depth = 0; + friend class BatchAAResults; }; @@ -847,6 +840,13 @@ public: FunctionModRefBehavior getModRefBehavior(const CallBase *Call) { return AA.getModRefBehavior(Call); } + bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) { + return alias(LocA, LocB) == MustAlias; + } + bool isMustAlias(const Value *V1, const Value *V2) { + return alias(MemoryLocation(V1, LocationSize::precise(1)), + MemoryLocation(V2, LocationSize::precise(1))) == MustAlias; + } }; /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis @@ -1107,9 +1107,6 @@ public: /// Return true if this pointer is returned by a noalias function. bool isNoAliasCall(const Value *V); -/// Return true if this is an argument with the noalias attribute. -bool isNoAliasArgument(const Value *V); - /// Return true if this pointer refers to a distinct and identifiable object. /// This returns true for: /// Global Variables and Functions (but not Global Aliases) @@ -1157,12 +1154,7 @@ public: ResultGetters.push_back(&getModuleAAResultImpl<AnalysisT>); } - Result run(Function &F, FunctionAnalysisManager &AM) { - Result R(AM.getResult<TargetLibraryAnalysis>(F)); - for (auto &Getter : ResultGetters) - (*Getter)(F, AM, R); - return R; - } + Result run(Function &F, FunctionAnalysisManager &AM); private: friend AnalysisInfoMixin<AAManager>; diff --git a/llvm/include/llvm/Analysis/AliasSetTracker.h b/llvm/include/llvm/Analysis/AliasSetTracker.h index 690a94d9cf2c..b27fd5aa92a7 100644 --- a/llvm/include/llvm/Analysis/AliasSetTracker.h +++ b/llvm/include/llvm/Analysis/AliasSetTracker.h @@ -20,9 +20,10 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" #include <cassert> @@ -33,6 +34,7 @@ namespace llvm { +class AAResults; class AliasSetTracker; class BasicBlock; class LoadInst; @@ -45,6 +47,8 @@ class StoreInst; class VAArgInst; class Value; +enum AliasResult : uint8_t; + class AliasSet : public ilist_node<AliasSet> { friend class AliasSetTracker; @@ -293,7 +297,7 @@ private: void addPointer(AliasSetTracker &AST, PointerRec &Entry, LocationSize Size, const AAMDNodes &AAInfo, bool KnownMustAlias = false, bool SkipSizeUpdate = false); - void addUnknownInst(Instruction *I, AliasAnalysis &AA); + void addUnknownInst(Instruction *I, AAResults &AA); void removeUnknownInst(AliasSetTracker &AST, Instruction *I) { bool WasEmpty = UnknownInsts.empty(); @@ -311,8 +315,8 @@ public: /// If the specified pointer "may" (or must) alias one of the members in the /// set return the appropriate AliasResult. Otherwise return NoAlias. AliasResult aliasesPointer(const Value *Ptr, LocationSize Size, - const AAMDNodes &AAInfo, AliasAnalysis &AA) const; - bool aliasesUnknownInst(const Instruction *Inst, AliasAnalysis &AA) const; + const AAMDNodes &AAInfo, AAResults &AA) const; + bool aliasesUnknownInst(const Instruction *Inst, AAResults &AA) const; }; inline raw_ostream& operator<<(raw_ostream &OS, const AliasSet &AS) { @@ -338,7 +342,7 @@ class AliasSetTracker { /// handle. struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {}; - AliasAnalysis &AA; + AAResults &AA; MemorySSA *MSSA = nullptr; Loop *L = nullptr; ilist<AliasSet> AliasSets; @@ -352,9 +356,9 @@ class AliasSetTracker { public: /// Create an empty collection of AliasSets, and use the specified alias /// analysis object to disambiguate load and store addresses. - explicit AliasSetTracker(AliasAnalysis &aa) : AA(aa) {} - explicit AliasSetTracker(AliasAnalysis &aa, MemorySSA *mssa, Loop *l) - : AA(aa), MSSA(mssa), L(l) {} + explicit AliasSetTracker(AAResults &AA) : AA(AA) {} + explicit AliasSetTracker(AAResults &AA, MemorySSA *MSSA, Loop *L) + : AA(AA), MSSA(MSSA), L(L) {} ~AliasSetTracker() { clear(); } /// These methods are used to add different types of instructions to the alias @@ -393,7 +397,7 @@ public: AliasSet &getAliasSetFor(const MemoryLocation &MemLoc); /// Return the underlying alias analysis object used by this tracker. - AliasAnalysis &getAliasAnalysis() const { return AA; } + AAResults &getAliasAnalysis() const { return AA; } /// This method is used to remove a pointer value from the AliasSetTracker /// entirely. It should be used when an instruction is deleted from the @@ -457,6 +461,14 @@ inline raw_ostream& operator<<(raw_ostream &OS, const AliasSetTracker &AST) { return OS; } +class AliasSetsPrinterPass : public PassInfoMixin<AliasSetsPrinterPass> { + raw_ostream &OS; + +public: + explicit AliasSetsPrinterPass(raw_ostream &OS); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + } // end namespace llvm #endif // LLVM_ANALYSIS_ALIASSETTRACKER_H diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h index 9214bfcd7a24..46b8cd1f3a88 100644 --- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -18,9 +18,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/MemoryLocation.h" -#include "llvm/IR/InstrTypes.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include <algorithm> @@ -120,6 +117,9 @@ private: APInt Scale; + // Context instruction to use when querying information about this index. + const Instruction *CxtI; + bool operator==(const VariableGEPIndex &Other) const { return V == Other.V && ZExtBits == Other.ZExtBits && SExtBits == Other.SExtBits && Scale == Other.Scale; @@ -128,6 +128,17 @@ private: bool operator!=(const VariableGEPIndex &Other) const { return !operator==(Other); } + + void dump() const { + print(dbgs()); + dbgs() << "\n"; + } + void print(raw_ostream &OS) const { + OS << "(V=" << V->getName() + << ", zextbits=" << ZExtBits + << ", sextbits=" << SExtBits + << ", scale=" << Scale << ")"; + } }; // Represents the internal structure of a GEP, decomposed into a base pointer, @@ -135,15 +146,29 @@ private: struct DecomposedGEP { // Base pointer of the GEP const Value *Base; - // Total constant offset w.r.t the base from indexing into structs - APInt StructOffset; - // Total constant offset w.r.t the base from indexing through - // pointers/arrays/vectors - APInt OtherOffset; + // Total constant offset from base. + APInt Offset; // Scaled variable (non-constant) indices. SmallVector<VariableGEPIndex, 4> VarIndices; // Is GEP index scale compile-time constant. bool HasCompileTimeConstantScale; + + void dump() const { + print(dbgs()); + dbgs() << "\n"; + } + void print(raw_ostream &OS) const { + OS << "(DecomposedGEP Base=" << Base->getName() + << ", Offset=" << Offset + << ", VarIndices=["; + for (size_t i = 0; i < VarIndices.size(); i++) { + if (i != 0) + OS << ", "; + VarIndices[i].print(OS); + } + OS << "], HasCompileTimeConstantScale=" << HasCompileTimeConstantScale + << ")"; + } }; /// Tracks phi nodes we have visited. @@ -171,8 +196,9 @@ private: const DataLayout &DL, unsigned Depth, AssumptionCache *AC, DominatorTree *DT, bool &NSW, bool &NUW); - static bool DecomposeGEPExpression(const Value *V, DecomposedGEP &Decomposed, - const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT); + static DecomposedGEP + DecomposeGEPExpression(const Value *V, const DataLayout &DL, + AssumptionCache *AC, DominatorTree *DT); static bool isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp, const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject, @@ -206,18 +232,23 @@ private: AliasResult aliasPHI(const PHINode *PN, LocationSize PNSize, const AAMDNodes &PNAAInfo, const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo, - const Value *UnderV2, AAQueryInfo &AAQI); + AAQueryInfo &AAQI); AliasResult aliasSelect(const SelectInst *SI, LocationSize SISize, const AAMDNodes &SIAAInfo, const Value *V2, LocationSize V2Size, const AAMDNodes &V2AAInfo, - const Value *UnderV2, AAQueryInfo &AAQI); + AAQueryInfo &AAQI); AliasResult aliasCheck(const Value *V1, LocationSize V1Size, - AAMDNodes V1AATag, const Value *V2, - LocationSize V2Size, AAMDNodes V2AATag, - AAQueryInfo &AAQI, const Value *O1 = nullptr, - const Value *O2 = nullptr); + const AAMDNodes &V1AATag, const Value *V2, + LocationSize V2Size, const AAMDNodes &V2AATag, + AAQueryInfo &AAQI); + + AliasResult aliasCheckRecursive(const Value *V1, LocationSize V1Size, + const AAMDNodes &V1AATag, const Value *V2, + LocationSize V2Size, const AAMDNodes &V2AATag, + AAQueryInfo &AAQI, const Value *O1, + const Value *O2); }; /// Analysis pass providing a never-invalidated alias analysis result. diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h index 868da7a64f68..c22787531117 100644 --- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -169,7 +169,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, BlockMass X) { /// algorithms for BlockFrequencyInfoImplBase. Only algorithms that depend on /// the block type (or that call such algorithms) are skipped here. /// -/// Nevertheless, the majority of the overall algorithm documention lives with +/// Nevertheless, the majority of the overall algorithm documentation lives with /// BlockFrequencyInfoImpl. See there for details. class BlockFrequencyInfoImplBase { public: @@ -458,7 +458,7 @@ public: /// Analyze irreducible SCCs. /// - /// Separate irreducible SCCs from \c G, which is an explict graph of \c + /// Separate irreducible SCCs from \c G, which is an explicit graph of \c /// OuterLoop (or the top-level function, if \c OuterLoop is \c nullptr). /// Insert them into \a Loops before \c Insert. /// @@ -706,7 +706,7 @@ void IrreducibleGraph::addEdges(const BlockNode &Node, /// /// In addition to loops, this algorithm has limited support for irreducible /// SCCs, which are SCCs with multiple entry blocks. Irreducible SCCs are -/// discovered on they fly, and modelled as loops with multiple headers. +/// discovered on the fly, and modelled as loops with multiple headers. /// /// The headers of irreducible sub-SCCs consist of its entry blocks and all /// nodes that are targets of a backedge within it (excluding backedges within @@ -1246,7 +1246,7 @@ bool BlockFrequencyInfoImpl<BT>::computeMassInLoop(LoopData &Loop) { } } // As a heuristic, if some headers don't have a weight, give them the - // minimium weight seen (not to disrupt the existing trends too much by + // minimum weight seen (not to disrupt the existing trends too much by // using a weight that's in the general range of the other headers' weights, // and the minimum seems to perform better than the average.) // FIXME: better update in the passes that drop the header weight. @@ -1449,8 +1449,8 @@ void BlockFrequencyInfoImpl<BT>::verifyMatch( BlockNode Node = Entry.second; if (OtherValidNodes.count(BB)) { BlockNode OtherNode = OtherValidNodes[BB]; - auto Freq = Freqs[Node.Index]; - auto OtherFreq = Other.Freqs[OtherNode.Index]; + const auto &Freq = Freqs[Node.Index]; + const auto &OtherFreq = Other.Freqs[OtherNode.Index]; if (Freq.Integer != OtherFreq.Integer) { Match = false; dbgs() << "Freq mismatch: " << bfi_detail::getBlockName(BB) << " " diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index 3e72afba36c3..6a286236a80e 100644 --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -27,13 +27,16 @@ #include <algorithm> #include <cassert> #include <cstdint> +#include <memory> #include <utility> namespace llvm { class Function; +class Loop; class LoopInfo; class raw_ostream; +class DominatorTree; class PostDominatorTree; class TargetLibraryInfo; class Value; @@ -50,20 +53,79 @@ class Value; /// identify an edge, since we can have multiple edges from Src to Dst. /// As an example, we can have a switch which jumps to Dst with value 0 and /// value 10. +/// +/// Process of computing branch probabilities can be logically viewed as three +/// step process: +/// +/// First, if there is a profile information associated with the branch then +/// it is trivially translated to branch probabilities. There is one exception +/// from this rule though. Probabilities for edges leading to "unreachable" +/// blocks (blocks with the estimated weight not greater than +/// UNREACHABLE_WEIGHT) are evaluated according to static estimation and +/// override profile information. If no branch probabilities were calculated +/// on this step then take the next one. +/// +/// Second, estimate absolute execution weights for each block based on +/// statically known information. Roots of such information are "cold", +/// "unreachable", "noreturn" and "unwind" blocks. Those blocks get their +/// weights set to BlockExecWeight::COLD, BlockExecWeight::UNREACHABLE, +/// BlockExecWeight::NORETURN and BlockExecWeight::UNWIND respectively. Then the +/// weights are propagated to the other blocks up the domination line. In +/// addition, if all successors have estimated weights set then maximum of these +/// weights assigned to the block itself (while this is not ideal heuristic in +/// theory it's simple and works reasonably well in most cases) and the process +/// repeats. Once the process of weights propagation converges branch +/// probabilities are set for all such branches that have at least one successor +/// with the weight set. Default execution weight (BlockExecWeight::DEFAULT) is +/// used for any successors which doesn't have its weight set. For loop back +/// branches we use their weights scaled by loop trip count equal to +/// 'LBH_TAKEN_WEIGHT/LBH_NOTTAKEN_WEIGHT'. +/// +/// Here is a simple example demonstrating how the described algorithm works. +/// +/// BB1 +/// / \ +/// v v +/// BB2 BB3 +/// / \ +/// v v +/// ColdBB UnreachBB +/// +/// Initially, ColdBB is associated with COLD_WEIGHT and UnreachBB with +/// UNREACHABLE_WEIGHT. COLD_WEIGHT is set to BB2 as maximum between its +/// successors. BB1 and BB3 has no explicit estimated weights and assumed to +/// have DEFAULT_WEIGHT. Based on assigned weights branches will have the +/// following probabilities: +/// P(BB1->BB2) = COLD_WEIGHT/(COLD_WEIGHT + DEFAULT_WEIGHT) = +/// 0xffff / (0xffff + 0xfffff) = 0.0588(5.9%) +/// P(BB1->BB3) = DEFAULT_WEIGHT_WEIGHT/(COLD_WEIGHT + DEFAULT_WEIGHT) = +/// 0xfffff / (0xffff + 0xfffff) = 0.941(94.1%) +/// P(BB2->ColdBB) = COLD_WEIGHT/(COLD_WEIGHT + UNREACHABLE_WEIGHT) = 1(100%) +/// P(BB2->UnreachBB) = +/// UNREACHABLE_WEIGHT/(COLD_WEIGHT+UNREACHABLE_WEIGHT) = 0(0%) +/// +/// If no branch probabilities were calculated on this step then take the next +/// one. +/// +/// Third, apply different kinds of local heuristics for each individual +/// branch until first match. For example probability of a pointer to be null is +/// estimated as PH_TAKEN_WEIGHT/(PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT). If +/// no local heuristic has been matched then branch is left with no explicit +/// probability set and assumed to have default probability. class BranchProbabilityInfo { public: BranchProbabilityInfo() = default; BranchProbabilityInfo(const Function &F, const LoopInfo &LI, const TargetLibraryInfo *TLI = nullptr, + DominatorTree *DT = nullptr, PostDominatorTree *PDT = nullptr) { - calculate(F, LI, TLI, PDT); + calculate(F, LI, TLI, DT, PDT); } BranchProbabilityInfo(BranchProbabilityInfo &&Arg) : Probs(std::move(Arg.Probs)), LastF(Arg.LastF), - PostDominatedByUnreachable(std::move(Arg.PostDominatedByUnreachable)), - PostDominatedByColdCall(std::move(Arg.PostDominatedByColdCall)) {} + EstimatedBlockWeight(std::move(Arg.EstimatedBlockWeight)) {} BranchProbabilityInfo(const BranchProbabilityInfo &) = delete; BranchProbabilityInfo &operator=(const BranchProbabilityInfo &) = delete; @@ -71,8 +133,7 @@ public: BranchProbabilityInfo &operator=(BranchProbabilityInfo &&RHS) { releaseMemory(); Probs = std::move(RHS.Probs); - PostDominatedByColdCall = std::move(RHS.PostDominatedByColdCall); - PostDominatedByUnreachable = std::move(RHS.PostDominatedByUnreachable); + EstimatedBlockWeight = std::move(RHS.EstimatedBlockWeight); return *this; } @@ -121,16 +182,6 @@ public: raw_ostream &printEdgeProbability(raw_ostream &OS, const BasicBlock *Src, const BasicBlock *Dst) const; -protected: - /// Set the raw edge probability for the given edge. - /// - /// This allows a pass to explicitly set the edge probability for an edge. It - /// can be used when updating the CFG to update and preserve the branch - /// probability information. Read the implementation of how these edge - /// probabilities are calculated carefully before using! - void setEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors, - BranchProbability Prob); - public: /// Set the raw probabilities for all edges from the given block. /// @@ -140,24 +191,85 @@ public: void setEdgeProbability(const BasicBlock *Src, const SmallVectorImpl<BranchProbability> &Probs); + /// Copy outgoing edge probabilities from \p Src to \p Dst. + /// + /// This allows to keep probabilities unset for the destination if they were + /// unset for source. + void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst); + static BranchProbability getBranchProbStackProtector(bool IsLikely) { static const BranchProbability LikelyProb((1u << 20) - 1, 1u << 20); return IsLikely ? LikelyProb : LikelyProb.getCompl(); } void calculate(const Function &F, const LoopInfo &LI, - const TargetLibraryInfo *TLI, PostDominatorTree *PDT); + const TargetLibraryInfo *TLI, DominatorTree *DT, + PostDominatorTree *PDT); /// Forget analysis results for the given basic block. void eraseBlock(const BasicBlock *BB); - // Use to track SCCs for handling irreducible loops. - using SccMap = DenseMap<const BasicBlock *, int>; - using SccHeaderMap = DenseMap<const BasicBlock *, bool>; - using SccHeaderMaps = std::vector<SccHeaderMap>; - struct SccInfo { + // Data structure to track SCCs for handling irreducible loops. + class SccInfo { + // Enum of types to classify basic blocks in SCC. Basic block belonging to + // SCC is 'Inner' until it is either 'Header' or 'Exiting'. Note that a + // basic block can be 'Header' and 'Exiting' at the same time. + enum SccBlockType { + Inner = 0x0, + Header = 0x1, + Exiting = 0x2, + }; + // Map of basic blocks to SCC IDs they belong to. If basic block doesn't + // belong to any SCC it is not in the map. + using SccMap = DenseMap<const BasicBlock *, int>; + // Each basic block in SCC is attributed with one or several types from + // SccBlockType. Map value has uint32_t type (instead of SccBlockType) + // since basic block may be for example "Header" and "Exiting" at the same + // time and we need to be able to keep more than one value from + // SccBlockType. + using SccBlockTypeMap = DenseMap<const BasicBlock *, uint32_t>; + // Vector containing classification of basic blocks for all SCCs where i'th + // vector element corresponds to SCC with ID equal to i. + using SccBlockTypeMaps = std::vector<SccBlockTypeMap>; + SccMap SccNums; - SccHeaderMaps SccHeaders; + SccBlockTypeMaps SccBlocks; + + public: + explicit SccInfo(const Function &F); + + /// If \p BB belongs to some SCC then ID of that SCC is returned, otherwise + /// -1 is returned. If \p BB belongs to more than one SCC at the same time + /// result is undefined. + int getSCCNum(const BasicBlock *BB) const; + /// Returns true if \p BB is a 'header' block in SCC with \p SccNum ID, + /// false otherwise. + bool isSCCHeader(const BasicBlock *BB, int SccNum) const { + return getSccBlockType(BB, SccNum) & Header; + } + /// Returns true if \p BB is an 'exiting' block in SCC with \p SccNum ID, + /// false otherwise. + bool isSCCExitingBlock(const BasicBlock *BB, int SccNum) const { + return getSccBlockType(BB, SccNum) & Exiting; + } + /// Fills in \p Enters vector with all such blocks that don't belong to + /// SCC with \p SccNum ID but there is an edge to a block belonging to the + /// SCC. + void getSccEnterBlocks(int SccNum, + SmallVectorImpl<BasicBlock *> &Enters) const; + /// Fills in \p Exits vector with all such blocks that don't belong to + /// SCC with \p SccNum ID but there is an edge from a block belonging to the + /// SCC. + void getSccExitBlocks(int SccNum, + SmallVectorImpl<BasicBlock *> &Exits) const; + + private: + /// Returns \p BB's type according to classification given by SccBlockType + /// enum. Please note that \p BB must belong to SSC with \p SccNum ID. + uint32_t getSccBlockType(const BasicBlock *BB, int SccNum) const; + /// Calculates \p BB's type and stores it in internal data structures for + /// future use. Please note that \p BB must belong to SSC with \p SccNum ID. + void calculateSccBlockType(const BasicBlock *BB, int SccNum); }; private: @@ -169,7 +281,6 @@ private: void deleted() override { assert(BPI != nullptr); BPI->eraseBlock(cast<BasicBlock>(getValPtr())); - BPI->Handles.erase(*this); } public: @@ -177,44 +288,132 @@ private: : CallbackVH(const_cast<Value *>(V)), BPI(BPI) {} }; + /// Pair of Loop and SCC ID number. Used to unify handling of normal and + /// SCC based loop representations. + using LoopData = std::pair<Loop *, int>; + /// Helper class to keep basic block along with its loop data information. + class LoopBlock { + public: + explicit LoopBlock(const BasicBlock *BB, const LoopInfo &LI, + const SccInfo &SccI); + + const BasicBlock *getBlock() const { return BB; } + BasicBlock *getBlock() { return const_cast<BasicBlock *>(BB); } + LoopData getLoopData() const { return LD; } + Loop *getLoop() const { return LD.first; } + int getSccNum() const { return LD.second; } + + bool belongsToLoop() const { return getLoop() || getSccNum() != -1; } + bool belongsToSameLoop(const LoopBlock &LB) const { + return (LB.getLoop() && getLoop() == LB.getLoop()) || + (LB.getSccNum() != -1 && getSccNum() == LB.getSccNum()); + } + + private: + const BasicBlock *const BB = nullptr; + LoopData LD = {nullptr, -1}; + }; + + // Pair of LoopBlocks representing an edge from first to second block. + using LoopEdge = std::pair<const LoopBlock &, const LoopBlock &>; + DenseSet<BasicBlockCallbackVH, DenseMapInfo<Value*>> Handles; // Since we allow duplicate edges from one basic block to another, we use // a pair (PredBlock and an index in the successors) to specify an edge. using Edge = std::pair<const BasicBlock *, unsigned>; - // Default weight value. Used when we don't have information about the edge. - // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of - // the successors have a weight yet. But it doesn't make sense when providing - // weight to an edge that may have siblings with non-zero weights. This can - // be handled various ways, but it's probably fine for an edge with unknown - // weight to just "inherit" the non-zero weight of an adjacent successor. - static const uint32_t DEFAULT_WEIGHT = 16; - DenseMap<Edge, BranchProbability> Probs; /// Track the last function we run over for printing. const Function *LastF = nullptr; - /// Track the set of blocks directly succeeded by a returning block. - SmallPtrSet<const BasicBlock *, 16> PostDominatedByUnreachable; + const LoopInfo *LI = nullptr; + + /// Keeps information about all SCCs in a function. + std::unique_ptr<const SccInfo> SccI; - /// Track the set of blocks that always lead to a cold call. - SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall; + /// Keeps mapping of a basic block to its estimated weight. + SmallDenseMap<const BasicBlock *, uint32_t> EstimatedBlockWeight; + + /// Keeps mapping of a loop to estimated weight to enter the loop. + SmallDenseMap<LoopData, uint32_t> EstimatedLoopWeight; + + /// Helper to construct LoopBlock for \p BB. + LoopBlock getLoopBlock(const BasicBlock *BB) const { + return LoopBlock(BB, *LI, *SccI.get()); + } - void computePostDominatedByUnreachable(const Function &F, - PostDominatorTree *PDT); - void computePostDominatedByColdCall(const Function &F, - PostDominatorTree *PDT); - bool calcUnreachableHeuristics(const BasicBlock *BB); + /// Returns true if destination block belongs to some loop and source block is + /// either doesn't belong to any loop or belongs to a loop which is not inner + /// relative to the destination block. + bool isLoopEnteringEdge(const LoopEdge &Edge) const; + /// Returns true if source block belongs to some loop and destination block is + /// either doesn't belong to any loop or belongs to a loop which is not inner + /// relative to the source block. + bool isLoopExitingEdge(const LoopEdge &Edge) const; + /// Returns true if \p Edge is either enters to or exits from some loop, false + /// in all other cases. + bool isLoopEnteringExitingEdge(const LoopEdge &Edge) const; + /// Returns true if source and destination blocks belongs to the same loop and + /// destination block is loop header. + bool isLoopBackEdge(const LoopEdge &Edge) const; + // Fills in \p Enters vector with all "enter" blocks to a loop \LB belongs to. + void getLoopEnterBlocks(const LoopBlock &LB, + SmallVectorImpl<BasicBlock *> &Enters) const; + // Fills in \p Exits vector with all "exit" blocks from a loop \LB belongs to. + void getLoopExitBlocks(const LoopBlock &LB, + SmallVectorImpl<BasicBlock *> &Exits) const; + + /// Returns estimated weight for \p BB. None if \p BB has no estimated weight. + Optional<uint32_t> getEstimatedBlockWeight(const BasicBlock *BB) const; + + /// Returns estimated weight to enter \p L. In other words it is weight of + /// loop's header block not scaled by trip count. Returns None if \p L has no + /// no estimated weight. + Optional<uint32_t> getEstimatedLoopWeight(const LoopData &L) const; + + /// Return estimated weight for \p Edge. Returns None if estimated weight is + /// unknown. + Optional<uint32_t> getEstimatedEdgeWeight(const LoopEdge &Edge) const; + + /// Iterates over all edges leading from \p SrcBB to \p Successors and + /// returns maximum of all estimated weights. If at least one edge has unknown + /// estimated weight None is returned. + template <class IterT> + Optional<uint32_t> + getMaxEstimatedEdgeWeight(const LoopBlock &SrcBB, + iterator_range<IterT> Successors) const; + + /// If \p LoopBB has no estimated weight then set it to \p BBWeight and + /// return true. Otherwise \p BB's weight remains unchanged and false is + /// returned. In addition all blocks/loops that might need their weight to be + /// re-estimated are put into BlockWorkList/LoopWorkList. + bool updateEstimatedBlockWeight(LoopBlock &LoopBB, uint32_t BBWeight, + SmallVectorImpl<BasicBlock *> &BlockWorkList, + SmallVectorImpl<LoopBlock> &LoopWorkList); + + /// Starting from \p LoopBB (including \p LoopBB itself) propagate \p BBWeight + /// up the domination tree. + void propagateEstimatedBlockWeight(const LoopBlock &LoopBB, DominatorTree *DT, + PostDominatorTree *PDT, uint32_t BBWeight, + SmallVectorImpl<BasicBlock *> &WorkList, + SmallVectorImpl<LoopBlock> &LoopWorkList); + + /// Returns block's weight encoded in the IR. + Optional<uint32_t> getInitialEstimatedBlockWeight(const BasicBlock *BB); + + // Computes estimated weights for all blocks in \p F. + void computeEestimateBlockWeight(const Function &F, DominatorTree *DT, + PostDominatorTree *PDT); + + /// Based on computed weights by \p computeEstimatedBlockWeight set + /// probabilities on branches. + bool calcEstimatedHeuristics(const BasicBlock *BB); bool calcMetadataWeights(const BasicBlock *BB); - bool calcColdCallHeuristics(const BasicBlock *BB); bool calcPointerHeuristics(const BasicBlock *BB); - bool calcLoopBranchHeuristics(const BasicBlock *BB, const LoopInfo &LI, - SccInfo &SccI); bool calcZeroHeuristics(const BasicBlock *BB, const TargetLibraryInfo *TLI); bool calcFloatingPointHeuristics(const BasicBlock *BB); - bool calcInvokeHeuristics(const BasicBlock *BB); }; /// Analysis pass which computes \c BranchProbabilityInfo. diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h index c4e49ce493ea..53700798b6b3 100644 --- a/llvm/include/llvm/Analysis/CFGPrinter.h +++ b/llvm/include/llvm/Analysis/CFGPrinter.h @@ -18,6 +18,7 @@ #ifndef LLVM_ANALYSIS_CFGPRINTER_H #define LLVM_ANALYSIS_CFGPRINTER_H +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/HeatUtils.h" @@ -141,8 +142,18 @@ struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits { return OS.str(); } - static std::string getCompleteNodeLabel(const BasicBlock *Node, - DOTFuncInfo *) { + static void eraseComment(std::string &OutStr, unsigned &I, unsigned Idx) { + OutStr.erase(OutStr.begin() + I, OutStr.begin() + Idx); + --I; + } + + static std::string getCompleteNodeLabel( + const BasicBlock *Node, DOTFuncInfo *, + llvm::function_ref<void(raw_string_ostream &, const BasicBlock &)> + HandleBasicBlock = [](raw_string_ostream &OS, + const BasicBlock &Node) -> void { OS << Node; }, + llvm::function_ref<void(std::string &, unsigned &, unsigned)> + HandleComment = eraseComment) { enum { MaxColumns = 80 }; std::string Str; raw_string_ostream OS(Str); @@ -152,7 +163,7 @@ struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits { OS << ":"; } - OS << *Node; + HandleBasicBlock(OS, *Node); std::string OutStr = OS.str(); if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); @@ -168,8 +179,7 @@ struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits { LastSpace = 0; } else if (OutStr[i] == ';') { // Delete comments! unsigned Idx = OutStr.find('\n', i + 1); // Find end of line - OutStr.erase(OutStr.begin() + i, OutStr.begin() + Idx); - --i; + HandleComment(OutStr, i, Idx); } else if (ColNum == MaxColumns) { // Wrap lines. // Wrap very long names even though we can't find a space. if (!LastSpace) @@ -285,7 +295,7 @@ struct DOTGraphTraits<DOTFuncInfo *> : public DefaultDOTGraphTraits { " fillcolor=\"" + Color + "70\""; return Attrs; } - bool isNodeHidden(const BasicBlock *Node); + bool isNodeHidden(const BasicBlock *Node, const DOTFuncInfo *CFGInfo); void computeHiddenNodes(const Function *F); }; } // End llvm namespace diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h index eb0d3ae8fedf..985424a74054 100644 --- a/llvm/include/llvm/Analysis/CGSCCPassManager.h +++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h @@ -90,6 +90,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -314,6 +315,16 @@ struct CGSCCUpdateResult { /// for a better technique. SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4> &InlinedInternalEdges; + + /// Weak VHs to keep track of indirect calls for the purposes of detecting + /// devirtualization. + /// + /// This is a map to avoid having duplicate entries. If a Value is + /// deallocated, its corresponding WeakTrackingVH will be nulled out. When + /// checking if a Value is in the map or not, also check if the corresponding + /// WeakTrackingVH is null to avoid issues with a new Value sharing the same + /// address as a deallocated one. + SmallMapVector<Value *, WeakTrackingVH, 16> IndirectVHs; }; /// The core module pass which does a post-order walk of the SCCs and @@ -325,18 +336,15 @@ struct CGSCCUpdateResult { /// \c CGSCCAnalysisManagerModuleProxy analysis prior to running the CGSCC /// pass over the module to enable a \c FunctionAnalysisManager to be used /// within this run safely. -template <typename CGSCCPassT> class ModuleToPostOrderCGSCCPassAdaptor - : public PassInfoMixin<ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>> { + : public PassInfoMixin<ModuleToPostOrderCGSCCPassAdaptor> { public: - explicit ModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) - : Pass(std::move(Pass)) {} + using PassConceptT = + detail::PassConcept<LazyCallGraph::SCC, CGSCCAnalysisManager, + LazyCallGraph &, CGSCCUpdateResult &>; - // We have to explicitly define all the special member functions because MSVC - // refuses to generate them. - ModuleToPostOrderCGSCCPassAdaptor( - const ModuleToPostOrderCGSCCPassAdaptor &Arg) - : Pass(Arg.Pass) {} + explicit ModuleToPostOrderCGSCCPassAdaptor(std::unique_ptr<PassConceptT> Pass) + : Pass(std::move(Pass)) {} ModuleToPostOrderCGSCCPassAdaptor(ModuleToPostOrderCGSCCPassAdaptor &&Arg) : Pass(std::move(Arg.Pass)) {} @@ -355,16 +363,22 @@ public: /// Runs the CGSCC pass across every SCC in the module. PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } + private: - CGSCCPassT Pass; + std::unique_ptr<PassConceptT> Pass; }; /// A function to deduce a function pass type and wrap it in the /// templated adaptor. template <typename CGSCCPassT> -ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT> +ModuleToPostOrderCGSCCPassAdaptor createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) { - return ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>(std::move(Pass)); + using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT, + PreservedAnalyses, CGSCCAnalysisManager, + LazyCallGraph &, CGSCCUpdateResult &>; + return ModuleToPostOrderCGSCCPassAdaptor( + std::make_unique<PassModelT>(std::move(Pass))); } /// A proxy from a \c FunctionAnalysisManager to an \c SCC. @@ -442,17 +456,13 @@ LazyCallGraph::SCC &updateCGAndAnalysisManagerForCGSCCPass( /// \c FunctionAnalysisManagerCGSCCProxy analysis prior to running the function /// pass over the SCC to enable a \c FunctionAnalysisManager to be used /// within this run safely. -template <typename FunctionPassT> class CGSCCToFunctionPassAdaptor - : public PassInfoMixin<CGSCCToFunctionPassAdaptor<FunctionPassT>> { + : public PassInfoMixin<CGSCCToFunctionPassAdaptor> { public: - explicit CGSCCToFunctionPassAdaptor(FunctionPassT Pass) - : Pass(std::move(Pass)) {} + using PassConceptT = detail::PassConcept<Function, FunctionAnalysisManager>; - // We have to explicitly define all the special member functions because MSVC - // refuses to generate them. - CGSCCToFunctionPassAdaptor(const CGSCCToFunctionPassAdaptor &Arg) - : Pass(Arg.Pass) {} + explicit CGSCCToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass) + : Pass(std::move(Pass)) {} CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg) : Pass(std::move(Arg.Pass)) {} @@ -469,90 +479,24 @@ public: /// Runs the function pass across every function in the module. PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, - LazyCallGraph &CG, CGSCCUpdateResult &UR) { - // Setup the function analysis manager from its proxy. - FunctionAnalysisManager &FAM = - AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); - - SmallVector<LazyCallGraph::Node *, 4> Nodes; - for (LazyCallGraph::Node &N : C) - Nodes.push_back(&N); - - // The SCC may get split while we are optimizing functions due to deleting - // edges. If this happens, the current SCC can shift, so keep track of - // a pointer we can overwrite. - LazyCallGraph::SCC *CurrentC = &C; - - LLVM_DEBUG(dbgs() << "Running function passes across an SCC: " << C - << "\n"); - - PreservedAnalyses PA = PreservedAnalyses::all(); - for (LazyCallGraph::Node *N : Nodes) { - // Skip nodes from other SCCs. These may have been split out during - // processing. We'll eventually visit those SCCs and pick up the nodes - // there. - if (CG.lookupSCC(*N) != CurrentC) - continue; - - Function &F = N->getFunction(); - - PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F); - if (!PI.runBeforePass<Function>(Pass, F)) - continue; - - PreservedAnalyses PassPA; - { - TimeTraceScope TimeScope(Pass.name()); - PassPA = Pass.run(F, FAM); - } - - PI.runAfterPass<Function>(Pass, F); - - // We know that the function pass couldn't have invalidated any other - // function's analyses (that's the contract of a function pass), so - // directly handle the function analysis manager's invalidation here. - FAM.invalidate(F, PassPA); - - // Then intersect the preserved set so that invalidation of module - // analyses will eventually occur when the module pass completes. - PA.intersect(std::move(PassPA)); - - // If the call graph hasn't been preserved, update it based on this - // function pass. This may also update the current SCC to point to - // a smaller, more refined SCC. - auto PAC = PA.getChecker<LazyCallGraphAnalysis>(); - if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) { - CurrentC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentC, *N, - AM, UR, FAM); - assert( - CG.lookupSCC(*N) == CurrentC && - "Current SCC not updated to the SCC containing the current node!"); - } - } + LazyCallGraph &CG, CGSCCUpdateResult &UR); - // By definition we preserve the proxy. And we preserve all analyses on - // Functions. This precludes *any* invalidation of function analyses by the - // proxy, but that's OK because we've taken care to invalidate analyses in - // the function analysis manager incrementally above. - PA.preserveSet<AllAnalysesOn<Function>>(); - PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); - - // We've also ensured that we updated the call graph along the way. - PA.preserve<LazyCallGraphAnalysis>(); - - return PA; - } + static bool isRequired() { return true; } private: - FunctionPassT Pass; + std::unique_ptr<PassConceptT> Pass; }; /// A function to deduce a function pass type and wrap it in the /// templated adaptor. template <typename FunctionPassT> -CGSCCToFunctionPassAdaptor<FunctionPassT> +CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) { - return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass)); + using PassModelT = + detail::PassModel<Function, FunctionPassT, PreservedAnalyses, + FunctionAnalysisManager>; + return CGSCCToFunctionPassAdaptor( + std::make_unique<PassModelT>(std::move(Pass))); } /// A helper that repeats an SCC pass each time an indirect call is refined to @@ -569,410 +513,36 @@ createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) { /// This repetition has the potential to be very large however, as each one /// might refine a single call site. As a consequence, in practice we use an /// upper bound on the number of repetitions to limit things. -template <typename PassT> -class DevirtSCCRepeatedPass - : public PassInfoMixin<DevirtSCCRepeatedPass<PassT>> { +class DevirtSCCRepeatedPass : public PassInfoMixin<DevirtSCCRepeatedPass> { public: - explicit DevirtSCCRepeatedPass(PassT Pass, int MaxIterations) + using PassConceptT = + detail::PassConcept<LazyCallGraph::SCC, CGSCCAnalysisManager, + LazyCallGraph &, CGSCCUpdateResult &>; + + explicit DevirtSCCRepeatedPass(std::unique_ptr<PassConceptT> Pass, + int MaxIterations) : Pass(std::move(Pass)), MaxIterations(MaxIterations) {} /// Runs the wrapped pass up to \c MaxIterations on the SCC, iterating /// whenever an indirect call is refined. PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, - LazyCallGraph &CG, CGSCCUpdateResult &UR) { - PreservedAnalyses PA = PreservedAnalyses::all(); - PassInstrumentation PI = - AM.getResult<PassInstrumentationAnalysis>(InitialC, CG); - - // The SCC may be refined while we are running passes over it, so set up - // a pointer that we can update. - LazyCallGraph::SCC *C = &InitialC; - - // Collect value handles for all of the indirect call sites. - SmallVector<WeakTrackingVH, 8> CallHandles; - - // Struct to track the counts of direct and indirect calls in each function - // of the SCC. - struct CallCount { - int Direct; - int Indirect; - }; - - // Put value handles on all of the indirect calls and return the number of - // direct calls for each function in the SCC. - auto ScanSCC = [](LazyCallGraph::SCC &C, - SmallVectorImpl<WeakTrackingVH> &CallHandles) { - assert(CallHandles.empty() && "Must start with a clear set of handles."); - - SmallDenseMap<Function *, CallCount> CallCounts; - CallCount CountLocal = {0, 0}; - for (LazyCallGraph::Node &N : C) { - CallCount &Count = - CallCounts.insert(std::make_pair(&N.getFunction(), CountLocal)) - .first->second; - for (Instruction &I : instructions(N.getFunction())) - if (auto *CB = dyn_cast<CallBase>(&I)) { - if (CB->getCalledFunction()) { - ++Count.Direct; - } else { - ++Count.Indirect; - CallHandles.push_back(WeakTrackingVH(&I)); - } - } - } - - return CallCounts; - }; - - // Populate the initial call handles and get the initial call counts. - auto CallCounts = ScanSCC(*C, CallHandles); - - for (int Iteration = 0;; ++Iteration) { - - if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C)) - continue; - - PreservedAnalyses PassPA = Pass.run(*C, AM, CG, UR); - - if (UR.InvalidatedSCCs.count(C)) - PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass); - else - PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C); - - // If the SCC structure has changed, bail immediately and let the outer - // CGSCC layer handle any iteration to reflect the refined structure. - if (UR.UpdatedC && UR.UpdatedC != C) { - PA.intersect(std::move(PassPA)); - break; - } - - // Check that we didn't miss any update scenario. - assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); - assert(C->begin() != C->end() && "Cannot have an empty SCC!"); - - // Check whether any of the handles were devirtualized. - auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) { - if (!CallH) - return false; - auto *CB = dyn_cast<CallBase>(CallH); - if (!CB) - return false; - - // If the call is still indirect, leave it alone. - Function *F = CB->getCalledFunction(); - if (!F) - return false; - - LLVM_DEBUG(dbgs() << "Found devirtualized call from " - << CB->getParent()->getParent()->getName() << " to " - << F->getName() << "\n"); - - // We now have a direct call where previously we had an indirect call, - // so iterate to process this devirtualization site. - return true; - }; - bool Devirt = llvm::any_of(CallHandles, IsDevirtualizedHandle); - - // Rescan to build up a new set of handles and count how many direct - // calls remain. If we decide to iterate, this also sets up the input to - // the next iteration. - CallHandles.clear(); - auto NewCallCounts = ScanSCC(*C, CallHandles); - - // If we haven't found an explicit devirtualization already see if we - // have decreased the number of indirect calls and increased the number - // of direct calls for any function in the SCC. This can be fooled by all - // manner of transformations such as DCE and other things, but seems to - // work well in practice. - if (!Devirt) - // Iterate over the keys in NewCallCounts, if Function also exists in - // CallCounts, make the check below. - for (auto &Pair : NewCallCounts) { - auto &CallCountNew = Pair.second; - auto CountIt = CallCounts.find(Pair.first); - if (CountIt != CallCounts.end()) { - const auto &CallCountOld = CountIt->second; - if (CallCountOld.Indirect > CallCountNew.Indirect && - CallCountOld.Direct < CallCountNew.Direct) { - Devirt = true; - break; - } - } - } - - if (!Devirt) { - PA.intersect(std::move(PassPA)); - break; - } - - // Otherwise, if we've already hit our max, we're done. - if (Iteration >= MaxIterations) { - LLVM_DEBUG( - dbgs() << "Found another devirtualization after hitting the max " - "number of repetitions (" - << MaxIterations << ") on SCC: " << *C << "\n"); - PA.intersect(std::move(PassPA)); - break; - } - - LLVM_DEBUG( - dbgs() - << "Repeating an SCC pass after finding a devirtualization in: " << *C - << "\n"); - - // Move over the new call counts in preparation for iterating. - CallCounts = std::move(NewCallCounts); - - // Update the analysis manager with each run and intersect the total set - // of preserved analyses so we're ready to iterate. - AM.invalidate(*C, PassPA); - - PA.intersect(std::move(PassPA)); - } - - // Note that we don't add any preserved entries here unlike a more normal - // "pass manager" because we only handle invalidation *between* iterations, - // not after the last iteration. - return PA; - } + LazyCallGraph &CG, CGSCCUpdateResult &UR); private: - PassT Pass; + std::unique_ptr<PassConceptT> Pass; int MaxIterations; }; /// A function to deduce a function pass type and wrap it in the /// templated adaptor. -template <typename PassT> -DevirtSCCRepeatedPass<PassT> createDevirtSCCRepeatedPass(PassT Pass, - int MaxIterations) { - return DevirtSCCRepeatedPass<PassT>(std::move(Pass), MaxIterations); -} - -// Out-of-line implementation details for templates below this point. - template <typename CGSCCPassT> -PreservedAnalyses -ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>::run(Module &M, - ModuleAnalysisManager &AM) { - // Setup the CGSCC analysis manager from its proxy. - CGSCCAnalysisManager &CGAM = - AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager(); - - // Get the call graph for this module. - LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M); - - // Get Function analysis manager from its proxy. - FunctionAnalysisManager &FAM = - AM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M)->getManager(); - - // We keep worklists to allow us to push more work onto the pass manager as - // the passes are run. - SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist; - SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist; - - // Keep sets for invalidated SCCs and RefSCCs that should be skipped when - // iterating off the worklists. - SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet; - SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet; - - SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4> - InlinedInternalEdges; - - CGSCCUpdateResult UR = { - RCWorklist, CWorklist, InvalidRefSCCSet, InvalidSCCSet, - nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges}; - - // Request PassInstrumentation from analysis manager, will use it to run - // instrumenting callbacks for the passes later. - PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M); - - PreservedAnalyses PA = PreservedAnalyses::all(); - CG.buildRefSCCs(); - for (auto RCI = CG.postorder_ref_scc_begin(), - RCE = CG.postorder_ref_scc_end(); - RCI != RCE;) { - assert(RCWorklist.empty() && - "Should always start with an empty RefSCC worklist"); - // The postorder_ref_sccs range we are walking is lazily constructed, so - // we only push the first one onto the worklist. The worklist allows us - // to capture *new* RefSCCs created during transformations. - // - // We really want to form RefSCCs lazily because that makes them cheaper - // to update as the program is simplified and allows us to have greater - // cache locality as forming a RefSCC touches all the parts of all the - // functions within that RefSCC. - // - // We also eagerly increment the iterator to the next position because - // the CGSCC passes below may delete the current RefSCC. - RCWorklist.insert(&*RCI++); - - do { - LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val(); - if (InvalidRefSCCSet.count(RC)) { - LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n"); - continue; - } - - assert(CWorklist.empty() && - "Should always start with an empty SCC worklist"); - - LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC - << "\n"); - - // The top of the worklist may *also* be the same SCC we just ran over - // (and invalidated for). Keep track of that last SCC we processed due - // to SCC update to avoid redundant processing when an SCC is both just - // updated itself and at the top of the worklist. - LazyCallGraph::SCC *LastUpdatedC = nullptr; - - // Push the initial SCCs in reverse post-order as we'll pop off the - // back and so see this in post-order. - for (LazyCallGraph::SCC &C : llvm::reverse(*RC)) - CWorklist.insert(&C); - - do { - LazyCallGraph::SCC *C = CWorklist.pop_back_val(); - // Due to call graph mutations, we may have invalid SCCs or SCCs from - // other RefSCCs in the worklist. The invalid ones are dead and the - // other RefSCCs should be queued above, so we just need to skip both - // scenarios here. - if (InvalidSCCSet.count(C)) { - LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n"); - continue; - } - if (LastUpdatedC == C) { - LLVM_DEBUG(dbgs() << "Skipping redundant run on SCC: " << *C << "\n"); - continue; - } - if (&C->getOuterRefSCC() != RC) { - LLVM_DEBUG(dbgs() << "Skipping an SCC that is now part of some other " - "RefSCC...\n"); - continue; - } - - // Ensure we can proxy analysis updates from the CGSCC analysis manager - // into the the Function analysis manager by getting a proxy here. - // This also needs to update the FunctionAnalysisManager, as this may be - // the first time we see this SCC. - CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).updateFAM( - FAM); - - // Each time we visit a new SCC pulled off the worklist, - // a transformation of a child SCC may have also modified this parent - // and invalidated analyses. So we invalidate using the update record's - // cross-SCC preserved set. This preserved set is intersected by any - // CGSCC pass that handles invalidation (primarily pass managers) prior - // to marking its SCC as preserved. That lets us track everything that - // might need invalidation across SCCs without excessive invalidations - // on a single SCC. - // - // This essentially allows SCC passes to freely invalidate analyses - // of any ancestor SCC. If this becomes detrimental to successfully - // caching analyses, we could force each SCC pass to manually - // invalidate the analyses for any SCCs other than themselves which - // are mutated. However, that seems to lose the robustness of the - // pass-manager driven invalidation scheme. - CGAM.invalidate(*C, UR.CrossSCCPA); - - do { - // Check that we didn't miss any update scenario. - assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!"); - assert(C->begin() != C->end() && "Cannot have an empty SCC!"); - assert(&C->getOuterRefSCC() == RC && - "Processing an SCC in a different RefSCC!"); - - LastUpdatedC = UR.UpdatedC; - UR.UpdatedRC = nullptr; - UR.UpdatedC = nullptr; - - // Check the PassInstrumentation's BeforePass callbacks before - // running the pass, skip its execution completely if asked to - // (callback returns false). - if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C)) - continue; - - PreservedAnalyses PassPA; - { - TimeTraceScope TimeScope(Pass.name()); - PassPA = Pass.run(*C, CGAM, CG, UR); - } - - if (UR.InvalidatedSCCs.count(C)) - PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass); - else - PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C); - - // Update the SCC and RefSCC if necessary. - C = UR.UpdatedC ? UR.UpdatedC : C; - RC = UR.UpdatedRC ? UR.UpdatedRC : RC; - - if (UR.UpdatedC) { - // If we're updating the SCC, also update the FAM inside the proxy's - // result. - CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG).updateFAM( - FAM); - } - - // If the CGSCC pass wasn't able to provide a valid updated SCC, - // the current SCC may simply need to be skipped if invalid. - if (UR.InvalidatedSCCs.count(C)) { - LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n"); - break; - } - // Check that we didn't miss any update scenario. - assert(C->begin() != C->end() && "Cannot have an empty SCC!"); - - // We handle invalidating the CGSCC analysis manager's information - // for the (potentially updated) SCC here. Note that any other SCCs - // whose structure has changed should have been invalidated by - // whatever was updating the call graph. This SCC gets invalidated - // late as it contains the nodes that were actively being - // processed. - CGAM.invalidate(*C, PassPA); - - // Then intersect the preserved set so that invalidation of module - // analyses will eventually occur when the module pass completes. - // Also intersect with the cross-SCC preserved set to capture any - // cross-SCC invalidation. - UR.CrossSCCPA.intersect(PassPA); - PA.intersect(std::move(PassPA)); - - // The pass may have restructured the call graph and refined the - // current SCC and/or RefSCC. We need to update our current SCC and - // RefSCC pointers to follow these. Also, when the current SCC is - // refined, re-run the SCC pass over the newly refined SCC in order - // to observe the most precise SCC model available. This inherently - // cannot cycle excessively as it only happens when we split SCCs - // apart, at most converging on a DAG of single nodes. - // FIXME: If we ever start having RefSCC passes, we'll want to - // iterate there too. - if (UR.UpdatedC) - LLVM_DEBUG(dbgs() - << "Re-running SCC passes after a refinement of the " - "current SCC: " - << *UR.UpdatedC << "\n"); - - // Note that both `C` and `RC` may at this point refer to deleted, - // invalid SCC and RefSCCs respectively. But we will short circuit - // the processing when we check them in the loop above. - } while (UR.UpdatedC); - } while (!CWorklist.empty()); - - // We only need to keep internal inlined edge information within - // a RefSCC, clear it to save on space and let the next time we visit - // any of these functions have a fresh start. - InlinedInternalEdges.clear(); - } while (!RCWorklist.empty()); - } - - // By definition we preserve the call garph, all SCC analyses, and the - // analysis proxies by handling them above and in any nested pass managers. - PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>(); - PA.preserve<LazyCallGraphAnalysis>(); - PA.preserve<CGSCCAnalysisManagerModuleProxy>(); - PA.preserve<FunctionAnalysisManagerModuleProxy>(); - return PA; +DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT Pass, + int MaxIterations) { + using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT, + PreservedAnalyses, CGSCCAnalysisManager, + LazyCallGraph &, CGSCCUpdateResult &>; + return DevirtSCCRepeatedPass(std::make_unique<PassModelT>(std::move(Pass)), + MaxIterations); } // Clear out the debug logging macro. diff --git a/llvm/include/llvm/Analysis/CallGraph.h b/llvm/include/llvm/Analysis/CallGraph.h index 98f9b0683fd4..4da448c9900b 100644 --- a/llvm/include/llvm/Analysis/CallGraph.h +++ b/llvm/include/llvm/Analysis/CallGraph.h @@ -87,13 +87,6 @@ class CallGraph { /// or calling an external function. std::unique_ptr<CallGraphNode> CallsExternalNode; - /// Replace the function represented by this node by another. - /// - /// This does not rescan the body of the function, so it is suitable when - /// splicing the body of one function to another while also updating all - /// callers from the old function to the new. - void spliceFunction(const Function *From, const Function *To); - public: explicit CallGraph(Module &M); CallGraph(CallGraph &&Arg); diff --git a/llvm/include/llvm/Analysis/CaptureTracking.h b/llvm/include/llvm/Analysis/CaptureTracking.h index e68675b278f1..9da5f18e944b 100644 --- a/llvm/include/llvm/Analysis/CaptureTracking.h +++ b/llvm/include/llvm/Analysis/CaptureTracking.h @@ -13,6 +13,8 @@ #ifndef LLVM_ANALYSIS_CAPTURETRACKING_H #define LLVM_ANALYSIS_CAPTURETRACKING_H +#include "llvm/ADT/DenseMap.h" + namespace llvm { class Value; @@ -94,6 +96,12 @@ namespace llvm { /// is zero, a default value is assumed. void PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker, unsigned MaxUsesToExplore = 0); + + /// Returns true if the pointer is to a function-local object that never + /// escapes from the function. + bool isNonEscapingLocalObject( + const Value *V, + SmallDenseMap<const Value *, bool, 8> *IsCapturedCache = nullptr); } // end namespace llvm #endif diff --git a/llvm/include/llvm/Analysis/CodeMetrics.h b/llvm/include/llvm/Analysis/CodeMetrics.h index eab24c8ab179..615591aa83ad 100644 --- a/llvm/include/llvm/Analysis/CodeMetrics.h +++ b/llvm/include/llvm/Analysis/CodeMetrics.h @@ -75,7 +75,8 @@ struct CodeMetrics { /// Add information about a block to the current state. void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, - const SmallPtrSetImpl<const Value*> &EphValues); + const SmallPtrSetImpl<const Value *> &EphValues, + bool PrepareForLTO = false); /// Collect a loop's ephemeral values (those used only by an assume /// or similar intrinsics in the loop). diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h index 0ccc782ad6f5..ef6e66b2b88e 100644 --- a/llvm/include/llvm/Analysis/ConstantFolding.h +++ b/llvm/include/llvm/Analysis/ConstantFolding.h @@ -25,6 +25,7 @@ template <typename T> class ArrayRef; class CallBase; class Constant; class ConstantExpr; +class DSOLocalEquivalent; class DataLayout; class Function; class GlobalValue; @@ -34,8 +35,11 @@ class Type; /// If this constant is a constant offset from a global, return the global and /// the constant. Because of constantexprs, this function is recursive. +/// If the global is part of a dso_local_equivalent constant, return it through +/// `Equiv` if it is provided. bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset, - const DataLayout &DL); + const DataLayout &DL, + DSOLocalEquivalent **DSOEquiv = nullptr); /// ConstantFoldInstruction - Try to constant fold the specified instruction. /// If successful, the constant result is returned, if not, null is returned. diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h new file mode 100644 index 000000000000..83c1fb4485fd --- /dev/null +++ b/llvm/include/llvm/Analysis/ConstraintSystem.h @@ -0,0 +1,88 @@ +//===- ConstraintSystem.h - A system of linear constraints. --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CONSTRAINTSYSTEM_H +#define LLVM_ANALYSIS_CONSTRAINTSYSTEM_H + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" + +#include <string> + +namespace llvm { + +class ConstraintSystem { + /// Current linear constraints in the system. + /// An entry of the form c0, c1, ... cn represents the following constraint: + /// c0 >= v0 * c1 + .... + v{n-1} * cn + SmallVector<SmallVector<int64_t, 8>, 4> Constraints; + + /// Current greatest common divisor for all coefficients in the system. + uint32_t GCD = 1; + + // Eliminate constraints from the system using Fourier–Motzkin elimination. + bool eliminateUsingFM(); + + /// Print the constraints in the system, using \p Names as variable names. + void dump(ArrayRef<std::string> Names) const; + + /// Print the constraints in the system, using x0...xn as variable names. + void dump() const; + + /// Returns true if there may be a solution for the constraints in the system. + bool mayHaveSolutionImpl(); + +public: + bool addVariableRow(const SmallVector<int64_t, 8> &R) { + assert(Constraints.empty() || R.size() == Constraints.back().size()); + // If all variable coefficients are 0, the constraint does not provide any + // usable information. + if (all_of(makeArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; })) + return false; + + for (const auto &C : R) { + auto A = std::abs(C); + GCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)A}, {32, GCD}) + .getZExtValue(); + } + Constraints.push_back(R); + return true; + } + + bool addVariableRowFill(const SmallVector<int64_t, 8> &R) { + for (auto &CR : Constraints) { + while (CR.size() != R.size()) + CR.push_back(0); + } + return addVariableRow(R); + } + + /// Returns true if there may be a solution for the constraints in the system. + bool mayHaveSolution(); + + static SmallVector<int64_t, 8> negate(SmallVector<int64_t, 8> R) { + // The negated constraint R is obtained by multiplying by -1 and adding 1 to + // the constant. + R[0] += 1; + for (auto &C : R) + C *= -1; + return R; + } + + bool isConditionImplied(SmallVector<int64_t, 8> R); + + void popLastConstraint() { Constraints.pop_back(); } + + /// Returns the number of rows in the constraint system. + unsigned size() const { return Constraints.size(); } +}; +} // namespace llvm + +#endif // LLVM_ANALYSIS_CONSTRAINTSYSTEM_H diff --git a/llvm/include/llvm/Analysis/DDG.h b/llvm/include/llvm/Analysis/DDG.h index 9e2b7907eaec..e3bef33e55c3 100644 --- a/llvm/include/llvm/Analysis/DDG.h +++ b/llvm/include/llvm/Analysis/DDG.h @@ -152,7 +152,7 @@ private: setKind((InstList.size() == 0 && Input.size() == 1) ? NodeKind::SingleInstruction : NodeKind::MultiInstruction); - InstList.insert(InstList.end(), Input.begin(), Input.end()); + llvm::append_range(InstList, Input); } void appendInstructions(const SimpleDDGNode &Input) { appendInstructions(Input.getInstructions()); @@ -290,6 +290,12 @@ public: bool getDependencies(const NodeType &Src, const NodeType &Dst, DependenceList &Deps) const; + /// Return a string representing the type of dependence that the dependence + /// analysis identified between the two given nodes. This function assumes + /// that there is a memory dependence between the given two nodes. + const std::string getDependenceString(const NodeType &Src, + const NodeType &Dst) const; + protected: // Name of the graph. std::string Name; @@ -463,6 +469,26 @@ bool DependenceGraphInfo<NodeType>::getDependencies( return !Deps.empty(); } +template <typename NodeType> +const std::string +DependenceGraphInfo<NodeType>::getDependenceString(const NodeType &Src, + const NodeType &Dst) const { + std::string Str; + raw_string_ostream OS(Str); + DependenceList Deps; + if (!getDependencies(Src, Dst, Deps)) + return OS.str(); + interleaveComma(Deps, OS, [&](const std::unique_ptr<Dependence> &D) { + D->dump(OS); + // Remove the extra new-line character printed by the dump + // method + if (OS.str().back() == '\n') + OS.str().pop_back(); + }); + + return OS.str(); +} + //===--------------------------------------------------------------------===// // GraphTraits specializations for the DDG //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/Analysis/DDGPrinter.h b/llvm/include/llvm/Analysis/DDGPrinter.h new file mode 100644 index 000000000000..4477b387fe50 --- /dev/null +++ b/llvm/include/llvm/Analysis/DDGPrinter.h @@ -0,0 +1,91 @@ +//===- llvm/Analysis/DDGPrinter.h -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// This file defines the DOT printer for the Data-Dependence Graph (DDG). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DDGPRINTER_H +#define LLVM_ANALYSIS_DDGPRINTER_H + +#include "llvm/Analysis/DDG.h" +#include "llvm/Pass.h" +#include "llvm/Support/DOTGraphTraits.h" + +namespace llvm { + +//===--------------------------------------------------------------------===// +// Implementation of DDG DOT Printer for a loop. +//===--------------------------------------------------------------------===// +class DDGDotPrinterPass : public PassInfoMixin<DDGDotPrinterPass> { +public: + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +//===--------------------------------------------------------------------===// +// Specialization of DOTGraphTraits. +//===--------------------------------------------------------------------===// +template <> +struct DOTGraphTraits<const DataDependenceGraph *> + : public DefaultDOTGraphTraits { + + DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} + + /// Generate a title for the graph in DOT format + std::string getGraphName(const DataDependenceGraph *G) { + assert(G && "expected a valid pointer to the graph."); + return "DDG for '" + std::string(G->getName()) + "'"; + } + + /// Print a DDG node either in concise form (-ddg-dot-only) or + /// verbose mode (-ddg-dot). + std::string getNodeLabel(const DDGNode *Node, + const DataDependenceGraph *Graph); + + /// Print attributes of an edge in the DDG graph. If the edge + /// is a MemoryDependence edge, then detailed dependence info + /// available from DependenceAnalysis is displayed. + std::string + getEdgeAttributes(const DDGNode *Node, + GraphTraits<const DDGNode *>::ChildIteratorType I, + const DataDependenceGraph *G); + + /// Do not print nodes that are part of a pi-block separately. They + /// will be printed when their containing pi-block is being printed. + bool isNodeHidden(const DDGNode *Node, const DataDependenceGraph *G); + +private: + /// Print a DDG node in concise form. + static std::string getSimpleNodeLabel(const DDGNode *Node, + const DataDependenceGraph *G); + + /// Print a DDG node with more information including containing instructions + /// and detailed information about the dependence edges. + static std::string getVerboseNodeLabel(const DDGNode *Node, + const DataDependenceGraph *G); + + /// Print a DDG edge in concise form. + static std::string getSimpleEdgeAttributes(const DDGNode *Src, + const DDGEdge *Edge, + const DataDependenceGraph *G); + + /// Print a DDG edge with more information including detailed information + /// about the dependence edges. + static std::string getVerboseEdgeAttributes(const DDGNode *Src, + const DDGEdge *Edge, + const DataDependenceGraph *G); +}; + +using DDGDotGraphTraits = DOTGraphTraits<const DataDependenceGraph *>; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_DDGPRINTER_H diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h new file mode 100644 index 000000000000..2658b6bbc80c --- /dev/null +++ b/llvm/include/llvm/Analysis/Delinearization.h @@ -0,0 +1,33 @@ +//===---- Delinearization.h - MultiDimensional Index Delinearization ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This implements an analysis pass that tries to delinearize all GEP +// instructions in all loops using the SCEV analysis functionality. This pass is +// only used for testing purposes: if your pass needs delinearization, please +// use the on-demand SCEVAddRecExpr::delinearize() function. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DELINEARIZATION_H +#define LLVM_ANALYSIS_DELINEARIZATION_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +struct DelinearizationPrinterPass + : public PassInfoMixin<DelinearizationPrinterPass> { + explicit DelinearizationPrinterPass(raw_ostream &OS); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + raw_ostream &OS; +}; +} // namespace llvm + +#endif // LLVM_ANALYSIS_DELINEARIZATION_H diff --git a/llvm/include/llvm/Analysis/DemandedBits.h b/llvm/include/llvm/Analysis/DemandedBits.h index 04db3eb57c18..7a8618a27ce7 100644 --- a/llvm/include/llvm/Analysis/DemandedBits.h +++ b/llvm/include/llvm/Analysis/DemandedBits.h @@ -61,6 +61,20 @@ public: void print(raw_ostream &OS); + /// Compute alive bits of one addition operand from alive output and known + /// operand bits + static APInt determineLiveOperandBitsAdd(unsigned OperandNo, + const APInt &AOut, + const KnownBits &LHS, + const KnownBits &RHS); + + /// Compute alive bits of one subtraction operand from alive output and known + /// operand bits + static APInt determineLiveOperandBitsSub(unsigned OperandNo, + const APInt &AOut, + const KnownBits &LHS, + const KnownBits &RHS); + private: void performAnalysis(); void determineLiveOperandBits(const Instruction *UserI, diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h index a2da97bb9059..2e4ae65d0981 100644 --- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h @@ -59,8 +59,10 @@ public: /// \brief Mark \p UniVal as a value that is always uniform. void addUniformOverride(const Value &UniVal); - /// \brief Mark \p DivVal as a value that is always divergent. - void markDivergent(const Value &DivVal); + /// \brief Mark \p DivVal as a value that is always divergent. Will not do so + /// if `isAlwaysUniform(DivVal)`. + /// \returns Whether the tracked divergence state of \p DivVal changed. + bool markDivergent(const Value &DivVal); /// \brief Propagate divergence to all instructions in the region. /// Divergence is seeded by calls to \p markDivergent. @@ -76,45 +78,38 @@ public: /// \brief Whether \p Val is divergent at its definition. bool isDivergent(const Value &Val) const; - /// \brief Whether \p U is divergent. Uses of a uniform value can be divergent. + /// \brief Whether \p U is divergent. Uses of a uniform value can be + /// divergent. bool isDivergentUse(const Use &U) const; void print(raw_ostream &OS, const Module *) const; private: - bool updateTerminator(const Instruction &Term) const; - bool updatePHINode(const PHINode &Phi) const; - - /// \brief Computes whether \p Inst is divergent based on the - /// divergence of its operands. - /// - /// \returns Whether \p Inst is divergent. - /// - /// This should only be called for non-phi, non-terminator instructions. - bool updateNormalInstruction(const Instruction &Inst) const; - - /// \brief Mark users of live-out users as divergent. - /// - /// \param LoopHeader the header of the divergent loop. - /// - /// Marks all users of live-out values of the loop headed by \p LoopHeader - /// as divergent and puts them on the worklist. - void taintLoopLiveOuts(const BasicBlock &LoopHeader); - - /// \brief Push all users of \p Val (in the region) to the worklist + /// \brief Mark \p Term as divergent and push all Instructions that become + /// divergent as a result on the worklist. + void analyzeControlDivergence(const Instruction &Term); + /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on + /// the worklist. + void taintAndPushPhiNodes(const BasicBlock &JoinBlock); + + /// \brief Identify all Instructions that become divergent because \p DivExit + /// is a divergent loop exit of \p DivLoop. Mark those instructions as + /// divergent and push them on the worklist. + void propagateLoopExitDivergence(const BasicBlock &DivExit, + const Loop &DivLoop); + + /// \brief Internal implementation function for propagateLoopExitDivergence. + void analyzeLoopExitDivergence(const BasicBlock &DivExit, + const Loop &OuterDivLoop); + + /// \brief Mark all instruction as divergent that use a value defined in \p + /// OuterDivLoop. Push their users on the worklist. + void analyzeTemporalDivergence(const Instruction &I, + const Loop &OuterDivLoop); + + /// \brief Push all users of \p Val (in the region) to the worklist. void pushUsers(const Value &I); - /// \brief Push all phi nodes in @block to the worklist - void pushPHINodes(const BasicBlock &Block); - - /// \brief Mark \p Block as join divergent - /// - /// A block is join divergent if two threads may reach it from different - /// incoming blocks at the same time. - void markBlockJoinDivergent(const BasicBlock &Block) { - DivergentJoinBlocks.insert(&Block); - } - /// \brief Whether \p Val is divergent when read in \p ObservingBlock. bool isTemporalDivergent(const BasicBlock &ObservingBlock, const Value &Val) const; @@ -123,27 +118,9 @@ private: /// /// (see markBlockJoinDivergent). bool isJoinDivergent(const BasicBlock &Block) const { - return DivergentJoinBlocks.find(&Block) != DivergentJoinBlocks.end(); + return DivergentJoinBlocks.contains(&Block); } - /// \brief Propagate control-induced divergence to users (phi nodes and - /// instructions). - // - // \param JoinBlock is a divergent loop exit or join point of two disjoint - // paths. - // \returns Whether \p JoinBlock is a divergent loop exit of \p TermLoop. - bool propagateJoinDivergence(const BasicBlock &JoinBlock, - const Loop *TermLoop); - - /// \brief Propagate induced value divergence due to control divergence in \p - /// Term. - void propagateBranchDivergence(const Instruction &Term); - - /// \brief Propagate divergent caused by a divergent loop exit. - /// - /// \param ExitingLoop is a divergent loop. - void propagateLoopDivergence(const Loop &ExitingLoop); - private: const Function &F; // If regionLoop != nullptr, analysis is only performed within \p RegionLoop. @@ -166,7 +143,7 @@ private: DenseSet<const Value *> UniformOverrides; // Blocks with joining divergent control from different predecessors. - DenseSet<const BasicBlock *> DivergentJoinBlocks; + DenseSet<const BasicBlock *> DivergentJoinBlocks; // FIXME Deprecated // Detected/marked divergent values. DenseSet<const Value *> DivergentValues; diff --git a/llvm/include/llvm/Analysis/DominanceFrontier.h b/llvm/include/llvm/Analysis/DominanceFrontier.h index f67929c997f9..cef5e03b3b7a 100644 --- a/llvm/include/llvm/Analysis/DominanceFrontier.h +++ b/llvm/include/llvm/Analysis/DominanceFrontier.h @@ -26,7 +26,6 @@ #include <map> #include <set> #include <utility> -#include <vector> namespace llvm { diff --git a/llvm/include/llvm/Analysis/EHPersonalities.h b/llvm/include/llvm/Analysis/EHPersonalities.h index c17b0b4a90d3..eaada6627494 100644 --- a/llvm/include/llvm/Analysis/EHPersonalities.h +++ b/llvm/include/llvm/Analysis/EHPersonalities.h @@ -28,11 +28,12 @@ enum class EHPersonality { GNU_CXX_SjLj, GNU_ObjC, MSVC_X86SEH, - MSVC_Win64SEH, + MSVC_TableSEH, MSVC_CXX, CoreCLR, Rust, - Wasm_CXX + Wasm_CXX, + XL_CXX }; /// See if the given exception handling personality function is one @@ -51,7 +52,7 @@ inline bool isAsynchronousEHPersonality(EHPersonality Pers) { // unknown personalities don't catch asynch exceptions. switch (Pers) { case EHPersonality::MSVC_X86SEH: - case EHPersonality::MSVC_Win64SEH: + case EHPersonality::MSVC_TableSEH: return true; default: return false; @@ -65,7 +66,7 @@ inline bool isFuncletEHPersonality(EHPersonality Pers) { switch (Pers) { case EHPersonality::MSVC_CXX: case EHPersonality::MSVC_X86SEH: - case EHPersonality::MSVC_Win64SEH: + case EHPersonality::MSVC_TableSEH: case EHPersonality::CoreCLR: return true; default: @@ -80,7 +81,7 @@ inline bool isScopedEHPersonality(EHPersonality Pers) { switch (Pers) { case EHPersonality::MSVC_CXX: case EHPersonality::MSVC_X86SEH: - case EHPersonality::MSVC_Win64SEH: + case EHPersonality::MSVC_TableSEH: case EHPersonality::CoreCLR: case EHPersonality::Wasm_CXX: return true; diff --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h new file mode 100644 index 000000000000..a5f96e72ce97 --- /dev/null +++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h @@ -0,0 +1,86 @@ +//=- FunctionPropertiesAnalysis.h - Function Properties Analysis --*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the FunctionPropertiesInfo and FunctionPropertiesAnalysis +// classes used to extract function properties. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FUNCTIONPROPERTIESANALYSIS_H_ +#define LLVM_FUNCTIONPROPERTIESANALYSIS_H_ + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +class Function; + +class FunctionPropertiesInfo { +public: + static FunctionPropertiesInfo getFunctionPropertiesInfo(const Function &F, + const LoopInfo &LI); + + void print(raw_ostream &OS) const; + + /// Number of basic blocks + int64_t BasicBlockCount = 0; + + /// Number of blocks reached from a conditional instruction, or that are + /// 'cases' of a SwitchInstr. + // FIXME: We may want to replace this with a more meaningful metric, like + // number of conditionally executed blocks: + // 'if (a) s();' would be counted here as 2 blocks, just like + // 'if (a) s(); else s2(); s3();' would. + int64_t BlocksReachedFromConditionalInstruction = 0; + + /// Number of uses of this function, plus 1 if the function is callable + /// outside the module. + int64_t Uses = 0; + + /// Number of direct calls made from this function to other functions + /// defined in this module. + int64_t DirectCallsToDefinedFunctions = 0; + + // Load Instruction Count + int64_t LoadInstCount = 0; + + // Store Instruction Count + int64_t StoreInstCount = 0; + + // Maximum Loop Depth in the Function + int64_t MaxLoopDepth = 0; + + // Number of Top Level Loops in the Function + int64_t TopLevelLoopCount = 0; +}; + +// Analysis pass +class FunctionPropertiesAnalysis + : public AnalysisInfoMixin<FunctionPropertiesAnalysis> { + +public: + static AnalysisKey Key; + + using Result = FunctionPropertiesInfo; + + Result run(Function &F, FunctionAnalysisManager &FAM); +}; + +/// Printer pass for the FunctionPropertiesAnalysis results. +class FunctionPropertiesPrinterPass + : public PassInfoMixin<FunctionPropertiesPrinterPass> { + raw_ostream &OS; + +public: + explicit FunctionPropertiesPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // namespace llvm +#endif // LLVM_FUNCTIONPROPERTIESANALYSIS_H_ diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h new file mode 100644 index 000000000000..9e97541e542b --- /dev/null +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -0,0 +1,789 @@ +//===- IRSimilarityIdentifier.h - Find similarity in a module --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// Interface file for the IRSimilarityIdentifier for identifying similarities in +// IR including the IRInstructionMapper, which maps an Instruction to unsigned +// integers. +// +// Two sequences of instructions are called "similar" if they perform the same +// series of operations for all inputs. +// +// \code +// %1 = add i32 %a, 10 +// %2 = add i32 %a, %1 +// %3 = icmp slt icmp %1, %2 +// \endcode +// +// and +// +// \code +// %1 = add i32 11, %a +// %2 = sub i32 %a, %1 +// %3 = icmp sgt icmp %2, %1 +// \endcode +// +// ultimately have the same result, even if the inputs, and structure are +// slightly different. +// +// For instructions, we do not worry about operands that do not have fixed +// semantic meaning to the program. We consider the opcode that the instruction +// has, the types, parameters, and extra information such as the function name, +// or comparison predicate. These are used to create a hash to map instructions +// to integers to be used in similarity matching in sequences of instructions +// +// Terminology: +// An IRSimilarityCandidate is a region of IRInstructionData (wrapped +// Instructions), usually used to denote a region of similarity has been found. +// +// A SimilarityGroup is a set of IRSimilarityCandidates that are structurally +// similar to one another. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H +#define LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H + +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" + +namespace llvm { +namespace IRSimilarity { + +struct IRInstructionDataList; + +/// This represents what is and is not supported when finding similarity in +/// Instructions. +/// +/// Legal Instructions are considered when looking at similarity between +/// Instructions. +/// +/// Illegal Instructions cannot be considered when looking for similarity +/// between Instructions. They act as boundaries between similarity regions. +/// +/// Invisible Instructions are skipped over during analysis. +// TODO: Shared with MachineOutliner +enum InstrType { Legal, Illegal, Invisible }; + +/// This provides the utilities for hashing an Instruction to an unsigned +/// integer. Two IRInstructionDatas produce the same hash value when their +/// underlying Instructions perform the same operation (even if they don't have +/// the same input operands.) +/// As a more concrete example, consider the following: +/// +/// \code +/// %add1 = add i32 %a, %b +/// %add2 = add i32 %c, %d +/// %add3 = add i64 %e, %f +/// \endcode +/// +// Then the IRInstructionData wrappers for these Instructions may be hashed like +/// so: +/// +/// \code +/// ; These two adds have the same types and operand types, so they hash to the +/// ; same number. +/// %add1 = add i32 %a, %b ; Hash: 1 +/// %add2 = add i32 %c, %d ; Hash: 1 +/// ; This add produces an i64. This differentiates it from %add1 and %add2. So, +/// ; it hashes to a different number. +/// %add3 = add i64 %e, %f; Hash: 2 +/// \endcode +/// +/// +/// This hashing scheme will be used to represent the program as a very long +/// string. This string can then be placed in a data structure which can be used +/// for similarity queries. +/// +/// TODO: Handle types of Instructions which can be equal even with different +/// operands. (E.g. comparisons with swapped predicates.) +/// TODO: Handle CallInsts, which are only checked for function type +/// by \ref isSameOperationAs. +/// TODO: Handle GetElementPtrInsts, as some of the operands have to be the +/// exact same, and some do not. +struct IRInstructionData : ilist_node<IRInstructionData> { + + /// The source Instruction that is being wrapped. + Instruction *Inst = nullptr; + /// The values of the operands in the Instruction. + SmallVector<Value *, 4> OperVals; + /// The legality of the wrapped instruction. This is informed by InstrType, + /// and is used when checking when two instructions are considered similar. + /// If either instruction is not legal, the instructions are automatically not + /// considered similar. + bool Legal; + + /// This is only relevant if we are wrapping a CmpInst where we needed to + /// change the predicate of a compare instruction from a greater than form + /// to a less than form. It is None otherwise. + Optional<CmpInst::Predicate> RevisedPredicate; + + /// Gather the information that is difficult to gather for an Instruction, or + /// is changed. i.e. the operands of an Instruction and the Types of those + /// operands. This extra information allows for similarity matching to make + /// assertions that allow for more flexibility when checking for whether an + /// Instruction performs the same operation. + IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL); + + /// Get the predicate that the compare instruction is using for hashing the + /// instruction. the IRInstructionData must be wrapping a CmpInst. + CmpInst::Predicate getPredicate() const; + + /// A function that swaps the predicates to their less than form if they are + /// in a greater than form. Otherwise, the predicate is unchanged. + /// + /// \param CI - The comparison operation to find a consistent preidcate for. + /// \return the consistent comparison predicate. + static CmpInst::Predicate predicateForConsistency(CmpInst *CI); + + /// Hashes \p Value based on its opcode, types, and operand types. + /// Two IRInstructionData instances produce the same hash when they perform + /// the same operation. + /// + /// As a simple example, consider the following instructions. + /// + /// \code + /// %add1 = add i32 %x1, %y1 + /// %add2 = add i32 %x2, %y2 + /// + /// %sub = sub i32 %x1, %y1 + /// + /// %add_i64 = add i64 %x2, %y2 + /// \endcode + /// + /// Because the first two adds operate the same types, and are performing the + /// same action, they will be hashed to the same value. + /// + /// However, the subtraction instruction is not the same as an addition, and + /// will be hashed to a different value. + /// + /// Finally, the last add has a different type compared to the first two add + /// instructions, so it will also be hashed to a different value that any of + /// the previous instructions. + /// + /// \param [in] ID - The IRInstructionData instance to be hashed. + /// \returns A hash_value of the IRInstructionData. + friend hash_code hash_value(const IRInstructionData &ID) { + SmallVector<Type *, 4> OperTypes; + for (Value *V : ID.OperVals) + OperTypes.push_back(V->getType()); + + if (isa<CmpInst>(ID.Inst)) + return llvm::hash_combine( + llvm::hash_value(ID.Inst->getOpcode()), + llvm::hash_value(ID.Inst->getType()), + llvm::hash_value(ID.getPredicate()), + llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); + else if (CallInst *CI = dyn_cast<CallInst>(ID.Inst)) + return llvm::hash_combine( + llvm::hash_value(ID.Inst->getOpcode()), + llvm::hash_value(ID.Inst->getType()), + llvm::hash_value(CI->getCalledFunction()->getName().str()), + llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); + return llvm::hash_combine( + llvm::hash_value(ID.Inst->getOpcode()), + llvm::hash_value(ID.Inst->getType()), + llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); + } + + IRInstructionDataList *IDL = nullptr; +}; + +struct IRInstructionDataList : simple_ilist<IRInstructionData> {}; + +/// Compare one IRInstructionData class to another IRInstructionData class for +/// whether they are performing a the same operation, and can mapped to the +/// same value. For regular instructions if the hash value is the same, then +/// they will also be close. +/// +/// \param A - The first IRInstructionData class to compare +/// \param B - The second IRInstructionData class to compare +/// \returns true if \p A and \p B are similar enough to be mapped to the same +/// value. +bool isClose(const IRInstructionData &A, const IRInstructionData &B); + +struct IRInstructionDataTraits : DenseMapInfo<IRInstructionData *> { + static inline IRInstructionData *getEmptyKey() { return nullptr; } + static inline IRInstructionData *getTombstoneKey() { + return reinterpret_cast<IRInstructionData *>(-1); + } + + static unsigned getHashValue(const IRInstructionData *E) { + using llvm::hash_value; + assert(E && "IRInstructionData is a nullptr?"); + return hash_value(*E); + } + + static bool isEqual(const IRInstructionData *LHS, + const IRInstructionData *RHS) { + if (RHS == getEmptyKey() || RHS == getTombstoneKey() || + LHS == getEmptyKey() || LHS == getTombstoneKey()) + return LHS == RHS; + + assert(LHS && RHS && "nullptr should have been caught by getEmptyKey?"); + return isClose(*LHS, *RHS); + } +}; + +/// Helper struct for converting the Instructions in a Module into a vector of +/// unsigned integers. This vector of unsigned integers can be thought of as a +/// "numeric string". This numeric string can then be queried by, for example, +/// data structures that find repeated substrings. +/// +/// This hashing is done per BasicBlock in the module. To hash Instructions +/// based off of their operations, each Instruction is wrapped in an +/// IRInstructionData struct. The unsigned integer for an IRInstructionData +/// depends on: +/// - The hash provided by the IRInstructionData. +/// - Which member of InstrType the IRInstructionData is classified as. +// See InstrType for more details on the possible classifications, and how they +// manifest in the numeric string. +/// +/// The numeric string for an individual BasicBlock is terminated by an unique +/// unsigned integer. This prevents data structures which rely on repetition +/// from matching across BasicBlocks. (For example, the SuffixTree.) +/// As a concrete example, if we have the following two BasicBlocks: +/// \code +/// bb0: +/// %add1 = add i32 %a, %b +/// %add2 = add i32 %c, %d +/// %add3 = add i64 %e, %f +/// bb1: +/// %sub = sub i32 %c, %d +/// \endcode +/// We may hash the Instructions like this (via IRInstructionData): +/// \code +/// bb0: +/// %add1 = add i32 %a, %b ; Hash: 1 +/// %add2 = add i32 %c, %d; Hash: 1 +/// %add3 = add i64 %e, %f; Hash: 2 +/// bb1: +/// %sub = sub i32 %c, %d; Hash: 3 +/// %add4 = add i32 %c, %d ; Hash: 1 +/// \endcode +/// And produce a "numeric string representation" like so: +/// 1, 1, 2, unique_integer_1, 3, 1, unique_integer_2 +/// +/// TODO: This is very similar to the MachineOutliner, and should be +/// consolidated into the same interface. +struct IRInstructionMapper { + /// The starting illegal instruction number to map to. + /// + /// Set to -3 for compatibility with DenseMapInfo<unsigned>. + unsigned IllegalInstrNumber = static_cast<unsigned>(-3); + + /// The next available integer to assign to a legal Instruction to. + unsigned LegalInstrNumber = 0; + + /// Correspondence from IRInstructionData to unsigned integers. + DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits> + InstructionIntegerMap; + + /// Set if we added an illegal number in the previous step. + /// Since each illegal number is unique, we only need one of them between + /// each range of legal numbers. This lets us make sure we don't add more + /// than one illegal number per range. + bool AddedIllegalLastTime = false; + + /// Marks whether we found a illegal instruction in the previous step. + bool CanCombineWithPrevInstr = false; + + /// Marks whether we have found a set of instructions that is long enough + /// to be considered for similarity. + bool HaveLegalRange = false; + + /// This allocator pointer is in charge of holding on to the IRInstructionData + /// so it is not deallocated until whatever external tool is using it is done + /// with the information. + SpecificBumpPtrAllocator<IRInstructionData> *InstDataAllocator = nullptr; + + /// This allocator pointer is in charge of creating the IRInstructionDataList + /// so it is not deallocated until whatever external tool is using it is done + /// with the information. + SpecificBumpPtrAllocator<IRInstructionDataList> *IDLAllocator = nullptr; + + /// Get an allocated IRInstructionData struct using the InstDataAllocator. + /// + /// \param I - The Instruction to wrap with IRInstructionData. + /// \param Legality - A boolean value that is true if the instruction is to + /// be considered for similarity, and false if not. + /// \param IDL - The InstructionDataList that the IRInstructionData is + /// inserted into. + /// \returns An allocated IRInstructionData struct. + IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality, + IRInstructionDataList &IDL); + + /// Get an allocated IRInstructionDataList object using the IDLAllocator. + /// + /// \returns An allocated IRInstructionDataList object. + IRInstructionDataList *allocateIRInstructionDataList(); + + IRInstructionDataList *IDL = nullptr; + + /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers + /// determined by \p InstrType. Two Instructions are mapped to the same value + /// if they are close as defined by the InstructionData class above. + /// + /// \param [in] BB - The BasicBlock to be mapped to integers. + /// \param [in,out] InstrList - Vector of IRInstructionData to append to. + /// \param [in,out] IntegerMapping - Vector of unsigned integers to append to. + void convertToUnsignedVec(BasicBlock &BB, + std::vector<IRInstructionData *> &InstrList, + std::vector<unsigned> &IntegerMapping); + + /// Maps an Instruction to a legal integer. + /// + /// \param [in] It - The Instruction to be mapped to an integer. + /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to + /// append to. + /// \param [in,out] InstrListForBB - Vector of InstructionData to append to. + /// \returns The integer \p It was mapped to. + unsigned mapToLegalUnsigned(BasicBlock::iterator &It, + std::vector<unsigned> &IntegerMappingForBB, + std::vector<IRInstructionData *> &InstrListForBB); + + /// Maps an Instruction to an illegal integer. + /// + /// \param [in] It - The \p Instruction to be mapped to an integer. + /// \param [in,out] IntegerMappingForBB - Vector of unsigned integers to + /// append to. + /// \param [in,out] InstrListForBB - Vector of IRInstructionData to append to. + /// \param End - true if creating a dummy IRInstructionData at the end of a + /// basic block. + /// \returns The integer \p It was mapped to. + unsigned mapToIllegalUnsigned( + BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, + std::vector<IRInstructionData *> &InstrListForBB, bool End = false); + + IRInstructionMapper(SpecificBumpPtrAllocator<IRInstructionData> *IDA, + SpecificBumpPtrAllocator<IRInstructionDataList> *IDLA) + : InstDataAllocator(IDA), IDLAllocator(IDLA) { + // Make sure that the implementation of DenseMapInfo<unsigned> hasn't + // changed. + assert(DenseMapInfo<unsigned>::getEmptyKey() == static_cast<unsigned>(-1) && + "DenseMapInfo<unsigned>'s empty key isn't -1!"); + assert(DenseMapInfo<unsigned>::getTombstoneKey() == + static_cast<unsigned>(-2) && + "DenseMapInfo<unsigned>'s tombstone key isn't -2!"); + + IDL = new (IDLAllocator->Allocate()) + IRInstructionDataList(); + } + + /// Custom InstVisitor to classify different instructions for whether it can + /// be analyzed for similarity. + struct InstructionClassification + : public InstVisitor<InstructionClassification, InstrType> { + InstructionClassification() {} + + // TODO: Determine a scheme to resolve when the label is similar enough. + InstrType visitBranchInst(BranchInst &BI) { return Illegal; } + // TODO: Determine a scheme to resolve when the labels are similar enough. + InstrType visitPHINode(PHINode &PN) { return Illegal; } + // TODO: Handle allocas. + InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; } + // We exclude variable argument instructions since variable arguments + // requires extra checking of the argument list. + InstrType visitVAArgInst(VAArgInst &VI) { return Illegal; } + // We exclude all exception handling cases since they are so context + // dependent. + InstrType visitLandingPadInst(LandingPadInst &LPI) { return Illegal; } + InstrType visitFuncletPadInst(FuncletPadInst &FPI) { return Illegal; } + // DebugInfo should be included in the regions, but should not be + // analyzed for similarity as it has no bearing on the outcome of the + // program. + InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } + // TODO: Handle specific intrinsics. + InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; } + // We only allow call instructions where the function has a name and + // is not an indirect call. + InstrType visitCallInst(CallInst &CI) { + Function *F = CI.getCalledFunction(); + if (!F || CI.isIndirectCall() || !F->hasName()) + return Illegal; + return Legal; + } + // TODO: We do not current handle similarity that changes the control flow. + InstrType visitInvokeInst(InvokeInst &II) { return Illegal; } + // TODO: We do not current handle similarity that changes the control flow. + InstrType visitCallBrInst(CallBrInst &CBI) { return Illegal; } + // TODO: Handle interblock similarity. + InstrType visitTerminator(Instruction &I) { return Illegal; } + InstrType visitInstruction(Instruction &I) { return Legal; } + }; + + /// Maps an Instruction to a member of InstrType. + InstructionClassification InstClassifier; +}; + +/// This is a class that wraps a range of IRInstructionData from one point to +/// another in the vector of IRInstructionData, which is a region of the +/// program. It is also responsible for defining the structure within this +/// region of instructions. +/// +/// The structure of a region is defined through a value numbering system +/// assigned to each unique value in a region at the creation of the +/// IRSimilarityCandidate. +/// +/// For example, for each Instruction we add a mapping for each new +/// value seen in that Instruction. +/// IR: Mapping Added: +/// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 +/// %add2 = add i32 %a, %1 %add2 -> 4 +/// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 +/// +/// We can compare IRSimilarityCandidates against one another. +/// The \ref isSimilar function compares each IRInstructionData against one +/// another and if we have the same sequences of IRInstructionData that would +/// create the same hash, we have similar IRSimilarityCandidates. +/// +/// We can also compare the structure of IRSimilarityCandidates. If we can +/// create a mapping of registers in the region contained by one +/// IRSimilarityCandidate to the region contained by different +/// IRSimilarityCandidate, they can be considered structurally similar. +/// +/// IRSimilarityCandidate1: IRSimilarityCandidate2: +/// %add1 = add i32 %a, %b %add1 = add i32 %d, %e +/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f +/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 +/// +/// Can have the following mapping from candidate to candidate of: +/// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4 +/// and can be considered similar. +/// +/// IRSimilarityCandidate1: IRSimilarityCandidate2: +/// %add1 = add i32 %a, %b %add1 = add i32 %d, c4 +/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f +/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 +/// +/// We cannot create the same mapping since the use of c4 is not used in the +/// same way as %b or c2. +class IRSimilarityCandidate { +private: + /// The start index of this IRSimilarityCandidate in the instruction list. + unsigned StartIdx = 0; + + /// The number of instructions in this IRSimilarityCandidate. + unsigned Len = 0; + + /// The first instruction in this IRSimilarityCandidate. + IRInstructionData *FirstInst = nullptr; + + /// The last instruction in this IRSimilarityCandidate. + IRInstructionData *LastInst = nullptr; + + /// Global Value Numbering structures + /// @{ + /// Stores the mapping of the value to the number assigned to it in the + /// IRSimilarityCandidate. + DenseMap<Value *, unsigned> ValueToNumber; + /// Stores the mapping of the number to the value assigned this number. + DenseMap<unsigned, Value *> NumberToValue; + /// @} + +public: + /// \param StartIdx - The starting location of the region. + /// \param Len - The length of the region. + /// \param FirstInstIt - The starting IRInstructionData of the region. + /// \param LastInstIt - The ending IRInstructionData of the region. + IRSimilarityCandidate(unsigned StartIdx, unsigned Len, + IRInstructionData *FirstInstIt, + IRInstructionData *LastInstIt); + + /// \param A - The first IRInstructionCandidate to compare. + /// \param B - The second IRInstructionCandidate to compare. + /// \returns True when every IRInstructionData in \p A is similar to every + /// IRInstructionData in \p B. + static bool isSimilar(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B); + + /// \param A - The first IRInstructionCandidate to compare. + /// \param B - The second IRInstructionCandidate to compare. + /// \returns True when every IRInstructionData in \p A is structurally similar + /// to \p B. + static bool compareStructure(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B); + + struct OperandMapping { + /// The IRSimilarityCandidate that holds the instruction the OperVals were + /// pulled from. + const IRSimilarityCandidate &IRSC; + + /// The operand values to be analyzed. + ArrayRef<Value *> &OperVals; + + /// The current mapping of global value numbers from one IRSimilarityCandidate + /// to another IRSimilarityCandidate. + DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping; + }; + + /// Compare the operands in \p A and \p B and check that the current mapping + /// of global value numbers from \p A to \p B and \p B to \A is consistent. + /// + /// \param A - The first IRInstructionCandidate, operand values, and current + /// operand mappings to compare. + /// \param B - The second IRInstructionCandidate, operand values, and current + /// operand mappings to compare. + /// \returns true if the IRSimilarityCandidates operands are compatible. + static bool compareNonCommutativeOperandMapping(OperandMapping A, + OperandMapping B); + + /// Compare the operands in \p A and \p B and check that the current mapping + /// of global value numbers from \p A to \p B and \p B to \A is consistent + /// given that the operands are commutative. + /// + /// \param A - The first IRInstructionCandidate, operand values, and current + /// operand mappings to compare. + /// \param B - The second IRInstructionCandidate, operand values, and current + /// operand mappings to compare. + /// \returns true if the IRSimilarityCandidates operands are compatible. + static bool compareCommutativeOperandMapping(OperandMapping A, + OperandMapping B); + + /// Compare the start and end indices of the two IRSimilarityCandidates for + /// whether they overlap. If the start instruction of one + /// IRSimilarityCandidate is less than the end instruction of the other, and + /// the start instruction of one is greater than the start instruction of the + /// other, they overlap. + /// + /// \returns true if the IRSimilarityCandidates do not have overlapping + /// instructions. + static bool overlap(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B); + + /// \returns the number of instructions in this Candidate. + unsigned getLength() const { return Len; } + + /// \returns the start index of this IRSimilarityCandidate. + unsigned getStartIdx() const { return StartIdx; } + + /// \returns the end index of this IRSimilarityCandidate. + unsigned getEndIdx() const { return StartIdx + Len - 1; } + + /// \returns The first IRInstructionData. + IRInstructionData *front() const { return FirstInst; } + /// \returns The last IRInstructionData. + IRInstructionData *back() const { return LastInst; } + + /// \returns The first Instruction. + Instruction *frontInstruction() { return FirstInst->Inst; } + /// \returns The last Instruction + Instruction *backInstruction() { return LastInst->Inst; } + + /// \returns The BasicBlock the IRSimilarityCandidate starts in. + BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); } + /// \returns The BasicBlock the IRSimilarityCandidate ends in. + BasicBlock *getEndBB() { return LastInst->Inst->getParent(); } + + /// \returns The Function that the IRSimilarityCandidate is located in. + Function *getFunction() { return getStartBB()->getParent(); } + + /// Finds the positive number associated with \p V if it has been mapped. + /// \param [in] V - the Value to find. + /// \returns The positive number corresponding to the value. + /// \returns None if not present. + Optional<unsigned> getGVN(Value *V) { + assert(V != nullptr && "Value is a nullptr?"); + DenseMap<Value *, unsigned>::iterator VNIt = ValueToNumber.find(V); + if (VNIt == ValueToNumber.end()) + return None; + return VNIt->second; + } + + /// Finds the Value associate with \p Num if it exists. + /// \param [in] Num - the number to find. + /// \returns The Value associated with the number. + /// \returns None if not present. + Optional<Value *> fromGVN(unsigned Num) { + DenseMap<unsigned, Value *>::iterator VNIt = NumberToValue.find(Num); + if (VNIt == NumberToValue.end()) + return None; + assert(VNIt->second != nullptr && "Found value is a nullptr!"); + return VNIt->second; + } + + /// \param RHS -The IRSimilarityCandidate to compare against + /// \returns true if the IRSimilarityCandidate is occurs after the + /// IRSimilarityCandidate in the program. + bool operator<(const IRSimilarityCandidate &RHS) const { + return getStartIdx() > RHS.getStartIdx(); + } + + using iterator = IRInstructionDataList::iterator; + iterator begin() const { return iterator(front()); } + iterator end() const { return std::next(iterator(back())); } +}; + +typedef std::vector<IRSimilarityCandidate> SimilarityGroup; +typedef std::vector<SimilarityGroup> SimilarityGroupList; + +/// This class puts all the pieces of the IRInstructionData, +/// IRInstructionMapper, IRSimilarityCandidate together. +/// +/// It first feeds the Module or vector of Modules into the IRInstructionMapper, +/// and puts all the mapped instructions into a single long list of +/// IRInstructionData. +/// +/// The list of unsigned integers is given to the Suffix Tree or similar data +/// structure to find repeated subsequences. We construct an +/// IRSimilarityCandidate for each instance of the subsequence. We compare them +/// against one another since These repeated subsequences can have different +/// structure. For each different kind of structure found, we create a +/// similarity group. +/// +/// If we had four IRSimilarityCandidates A, B, C, and D where A, B and D are +/// structurally similar to one another, while C is different we would have two +/// SimilarityGroups: +/// +/// SimilarityGroup 1: SimilarityGroup 2 +/// A, B, D C +/// +/// A list of the different similarity groups is then returned after +/// analyzing the module. +class IRSimilarityIdentifier { +public: + IRSimilarityIdentifier() + : Mapper(&InstDataAllocator, &InstDataListAllocator) {} + + /// \param M the module to find similarity in. + explicit IRSimilarityIdentifier(Module &M) + : Mapper(&InstDataAllocator, &InstDataListAllocator) { + findSimilarity(M); + } + +private: + /// Map the instructions in the module to unsigned integers, using mapping + /// already present in the Mapper if possible. + /// + /// \param [in] M Module - To map to integers. + /// \param [in,out] InstrList - The vector to append IRInstructionData to. + /// \param [in,out] IntegerMapping - The vector to append integers to. + void populateMapper(Module &M, std::vector<IRInstructionData *> &InstrList, + std::vector<unsigned> &IntegerMapping); + + /// Map the instructions in the modules vector to unsigned integers, using + /// mapping already present in the mapper if possible. + /// + /// \param [in] Modules - The list of modules to use to populate the mapper + /// \param [in,out] InstrList - The vector to append IRInstructionData to. + /// \param [in,out] IntegerMapping - The vector to append integers to. + void populateMapper(ArrayRef<std::unique_ptr<Module>> &Modules, + std::vector<IRInstructionData *> &InstrList, + std::vector<unsigned> &IntegerMapping); + + /// Find the similarity candidates in \p InstrList and corresponding + /// \p UnsignedVec + /// + /// \param [in,out] InstrList - The vector to append IRInstructionData to. + /// \param [in,out] IntegerMapping - The vector to append integers to. + /// candidates found in the program. + void findCandidates(std::vector<IRInstructionData *> &InstrList, + std::vector<unsigned> &IntegerMapping); + +public: + // Find the IRSimilarityCandidates in the \p Modules and group by structural + // similarity in a SimilarityGroup, each group is returned in a + // SimilarityGroupList. + // + // \param [in] Modules - the modules to analyze. + // \returns The groups of similarity ranges found in the modules. + SimilarityGroupList & + findSimilarity(ArrayRef<std::unique_ptr<Module>> Modules); + + // Find the IRSimilarityCandidates in the given Module grouped by structural + // similarity in a SimilarityGroup, contained inside a SimilarityGroupList. + // + // \param [in] M - the module to analyze. + // \returns The groups of similarity ranges found in the module. + SimilarityGroupList &findSimilarity(Module &M); + + // Clears \ref SimilarityCandidates if it is already filled by a previous run. + void resetSimilarityCandidates() { + // If we've already analyzed a Module or set of Modules, so we must clear + // the SimilarityCandidates to make sure we do not have only old values + // hanging around. + if (SimilarityCandidates.hasValue()) + SimilarityCandidates->clear(); + else + SimilarityCandidates = SimilarityGroupList(); + } + + // \returns The groups of similarity ranges found in the most recently passed + // set of modules. + Optional<SimilarityGroupList> &getSimilarity() { + return SimilarityCandidates; + } + +private: + /// The allocator for IRInstructionData. + SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator; + + /// The allocator for IRInstructionDataLists. + SpecificBumpPtrAllocator<IRInstructionDataList> InstDataListAllocator; + + /// Map Instructions to unsigned integers and wraps the Instruction in an + /// instance of IRInstructionData. + IRInstructionMapper Mapper; + + /// The SimilarityGroups found with the most recent run of \ref + /// findSimilarity. None if there is no recent run. + Optional<SimilarityGroupList> SimilarityCandidates; +}; + +} // end namespace IRSimilarity + +/// An analysis pass based on legacy pass manager that runs and returns +/// IRSimilarityIdentifier run on the Module. +class IRSimilarityIdentifierWrapperPass : public ModulePass { + std::unique_ptr<IRSimilarity::IRSimilarityIdentifier> IRSI; + +public: + static char ID; + IRSimilarityIdentifierWrapperPass(); + + IRSimilarity::IRSimilarityIdentifier &getIRSI() { return *IRSI; } + const IRSimilarity::IRSimilarityIdentifier &getIRSI() const { return *IRSI; } + + bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; + bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; + +/// An analysis pass that runs and returns the IRSimilarityIdentifier run on the +/// Module. +class IRSimilarityAnalysis : public AnalysisInfoMixin<IRSimilarityAnalysis> { +public: + typedef IRSimilarity::IRSimilarityIdentifier Result; + + Result run(Module &M, ModuleAnalysisManager &); + +private: + friend AnalysisInfoMixin<IRSimilarityAnalysis>; + static AnalysisKey Key; +}; + +/// Printer pass that uses \c IRSimilarityAnalysis. +class IRSimilarityAnalysisPrinterPass + : public PassInfoMixin<IRSimilarityAnalysisPrinterPass> { + raw_ostream &OS; + +public: + explicit IRSimilarityAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 1bae83d13c7a..28546110ba04 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -32,7 +32,24 @@ class PredicatedScalarEvolution; class ScalarEvolution; class SCEV; class DominatorTree; -class ICFLoopSafetyInfo; + +/// These are the kinds of recurrences that we support. +enum class RecurKind { + None, ///< Not a recurrence. + Add, ///< Sum of integers. + Mul, ///< Product of integers. + Or, ///< Bitwise or logical OR of integers. + And, ///< Bitwise or logical AND of integers. + Xor, ///< Bitwise or logical XOR of integers. + SMin, ///< Signed integer min implemented in terms of select(cmp()). + SMax, ///< Signed integer max implemented in terms of select(cmp()). + UMin, ///< Unisgned integer min implemented in terms of select(cmp()). + UMax, ///< Unsigned integer max implemented in terms of select(cmp()). + FAdd, ///< Sum of floats. + FMul, ///< Product of floats. + FMin, ///< FP min implemented in terms of select(cmp()). + FMax ///< FP max implemented in terms of select(cmp()). +}; /// The RecurrenceDescriptor is used to identify recurrences variables in a /// loop. Reduction is a special case of recurrence that has uses of the @@ -48,40 +65,13 @@ class ICFLoopSafetyInfo; /// This struct holds information about recurrence variables. class RecurrenceDescriptor { public: - /// This enum represents the kinds of recurrences that we support. - enum RecurrenceKind { - RK_NoRecurrence, ///< Not a recurrence. - RK_IntegerAdd, ///< Sum of integers. - RK_IntegerMult, ///< Product of integers. - RK_IntegerOr, ///< Bitwise or logical OR of numbers. - RK_IntegerAnd, ///< Bitwise or logical AND of numbers. - RK_IntegerXor, ///< Bitwise or logical XOR of numbers. - RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()). - RK_FloatAdd, ///< Sum of floats. - RK_FloatMult, ///< Product of floats. - RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()). - }; - - // This enum represents the kind of minmax recurrence. - enum MinMaxRecurrenceKind { - MRK_Invalid, - MRK_UIntMin, - MRK_UIntMax, - MRK_SIntMin, - MRK_SIntMax, - MRK_FloatMin, - MRK_FloatMax - }; - RecurrenceDescriptor() = default; - RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K, - FastMathFlags FMF, MinMaxRecurrenceKind MK, - Instruction *UAI, Type *RT, bool Signed, - SmallPtrSetImpl<Instruction *> &CI) + RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurKind K, + FastMathFlags FMF, Instruction *UAI, Type *RT, + bool Signed, SmallPtrSetImpl<Instruction *> &CI) : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF), - MinMaxKind(MK), UnsafeAlgebraInst(UAI), RecurrenceType(RT), - IsSigned(Signed) { + UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) { CastInsts.insert(CI.begin(), CI.end()); } @@ -89,22 +79,22 @@ public: class InstDesc { public: InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr) - : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid), - UnsafeAlgebraInst(UAI) {} + : IsRecurrence(IsRecur), PatternLastInst(I), + RecKind(RecurKind::None), UnsafeAlgebraInst(UAI) {} - InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr) - : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K), + InstDesc(Instruction *I, RecurKind K, Instruction *UAI = nullptr) + : IsRecurrence(true), PatternLastInst(I), RecKind(K), UnsafeAlgebraInst(UAI) {} - bool isRecurrence() { return IsRecurrence; } + bool isRecurrence() const { return IsRecurrence; } - bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } + bool hasUnsafeAlgebra() const { return UnsafeAlgebraInst != nullptr; } - Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } + Instruction *getUnsafeAlgebraInst() const { return UnsafeAlgebraInst; } - MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; } + RecurKind getRecKind() const { return RecKind; } - Instruction *getPatternInst() { return PatternLastInst; } + Instruction *getPatternInst() const { return PatternLastInst; } private: // Is this instruction a recurrence candidate. @@ -112,8 +102,8 @@ public: // The last instruction in a min/max pattern (select of the select(icmp()) // pattern), or the current recurrence instruction otherwise. Instruction *PatternLastInst; - // If this is a min/max pattern the comparison predicate. - MinMaxRecurrenceKind MinMaxKind; + // If this is a min/max pattern. + RecurKind RecKind; // Recurrence has unsafe algebra. Instruction *UnsafeAlgebraInst; }; @@ -123,7 +113,7 @@ public: /// select(icmp()) this function advances the instruction pointer 'I' from the /// compare instruction to the select instruction and stores this pointer in /// 'PatternLastInst' member of the returned struct. - static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, + static InstDesc isRecurrenceInstr(Instruction *I, RecurKind Kind, InstDesc &Prev, bool HasFunNoNaNAttr); /// Returns true if instruction I has multiple uses in Insts @@ -134,27 +124,28 @@ public: /// Returns true if all uses of the instruction I is within the Set. static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set); - /// Returns a struct describing if the instruction if the instruction is a + /// Returns a struct describing if the instruction is a /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y) - /// or max(X, Y). - static InstDesc isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev); + /// or max(X, Y). \p Prev specifies the description of an already processed + /// select instruction, so its corresponding cmp can be matched to it. + static InstDesc isMinMaxSelectCmpPattern(Instruction *I, + const InstDesc &Prev); /// Returns a struct describing if the instruction is a /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern. - static InstDesc isConditionalRdxPattern(RecurrenceKind Kind, Instruction *I); + static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I); /// Returns identity corresponding to the RecurrenceKind. - static Constant *getRecurrenceIdentity(RecurrenceKind K, Type *Tp); + static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp); - /// Returns the opcode of binary operation corresponding to the - /// RecurrenceKind. - static unsigned getRecurrenceBinOp(RecurrenceKind Kind); + /// Returns the opcode corresponding to the RecurrenceKind. + static unsigned getOpcode(RecurKind Kind); /// Returns true if Phi is a reduction of type Kind and adds it to the /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are /// non-null, the minimal bit width needed to compute the reduction will be /// computed. - static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop, + static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, bool HasFunNoNaNAttr, RecurrenceDescriptor &RedDes, DemandedBits *DB = nullptr, @@ -183,42 +174,63 @@ public: DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT); - RecurrenceKind getRecurrenceKind() { return Kind; } + RecurKind getRecurrenceKind() const { return Kind; } - MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; } + unsigned getOpcode() const { return getOpcode(getRecurrenceKind()); } - FastMathFlags getFastMathFlags() { return FMF; } + FastMathFlags getFastMathFlags() const { return FMF; } - TrackingVH<Value> getRecurrenceStartValue() { return StartValue; } + TrackingVH<Value> getRecurrenceStartValue() const { return StartValue; } - Instruction *getLoopExitInstr() { return LoopExitInstr; } + Instruction *getLoopExitInstr() const { return LoopExitInstr; } /// Returns true if the recurrence has unsafe algebra which requires a relaxed /// floating-point model. - bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } + bool hasUnsafeAlgebra() const { return UnsafeAlgebraInst != nullptr; } /// Returns first unsafe algebra instruction in the PHI node's use-chain. - Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } + Instruction *getUnsafeAlgebraInst() const { return UnsafeAlgebraInst; } /// Returns true if the recurrence kind is an integer kind. - static bool isIntegerRecurrenceKind(RecurrenceKind Kind); + static bool isIntegerRecurrenceKind(RecurKind Kind); /// Returns true if the recurrence kind is a floating point kind. - static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind); + static bool isFloatingPointRecurrenceKind(RecurKind Kind); /// Returns true if the recurrence kind is an arithmetic kind. - static bool isArithmeticRecurrenceKind(RecurrenceKind Kind); + static bool isArithmeticRecurrenceKind(RecurKind Kind); + + /// Returns true if the recurrence kind is an integer min/max kind. + static bool isIntMinMaxRecurrenceKind(RecurKind Kind) { + return Kind == RecurKind::UMin || Kind == RecurKind::UMax || + Kind == RecurKind::SMin || Kind == RecurKind::SMax; + } + + /// Returns true if the recurrence kind is a floating-point min/max kind. + static bool isFPMinMaxRecurrenceKind(RecurKind Kind) { + return Kind == RecurKind::FMin || Kind == RecurKind::FMax; + } + + /// Returns true if the recurrence kind is any min/max kind. + static bool isMinMaxRecurrenceKind(RecurKind Kind) { + return isIntMinMaxRecurrenceKind(Kind) || isFPMinMaxRecurrenceKind(Kind); + } /// Returns the type of the recurrence. This type can be narrower than the /// actual type of the Phi if the recurrence has been type-promoted. - Type *getRecurrenceType() { return RecurrenceType; } + Type *getRecurrenceType() const { return RecurrenceType; } /// Returns a reference to the instructions used for type-promoting the /// recurrence. - SmallPtrSet<Instruction *, 8> &getCastInsts() { return CastInsts; } + const SmallPtrSet<Instruction *, 8> &getCastInsts() const { return CastInsts; } /// Returns true if all source operands of the recurrence are SExtInsts. - bool isSigned() { return IsSigned; } + bool isSigned() const { return IsSigned; } + + /// Attempts to find a chain of operations from Phi to LoopExitInst that can + /// be treated as a set of reductions instructions for in-loop reductions. + SmallVector<Instruction *, 4> getReductionOpChain(PHINode *Phi, + Loop *L) const; private: // The starting value of the recurrence. @@ -227,12 +239,10 @@ private: // The instruction who's value is used outside the loop. Instruction *LoopExitInstr = nullptr; // The kind of the recurrence. - RecurrenceKind Kind = RK_NoRecurrence; + RecurKind Kind = RecurKind::None; // The fast-math flags on the recurrent instructions. We propagate these // fast-math flags into the vectorized FP instructions we generate. FastMathFlags FMF; - // If this a min/max recurrence the kind of recurrence. - MinMaxRecurrenceKind MinMaxKind = MRK_Invalid; // First occurrence of unasfe algebra in the PHI's use-chain. Instruction *UnsafeAlgebraInst = nullptr; // The type of the recurrence. @@ -258,12 +268,6 @@ public: /// Default constructor - creates an invalid induction. InductionDescriptor() = default; - /// Get the consecutive direction. Returns: - /// 0 - unknown or non-consecutive. - /// 1 - consecutive and increasing. - /// -1 - consecutive and decreasing. - int getConsecutiveDirection() const; - Value *getStartValue() const { return StartValue; } InductionKind getKind() const { return IK; } const SCEV *getStep() const { return Step; } diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index 3480d93385a8..c39fae13d3b8 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -9,12 +9,12 @@ #ifndef LLVM_INLINEADVISOR_H_ #define LLVM_INLINEADVISOR_H_ -#include <memory> -#include <unordered_set> -#include <vector> - #include "llvm/Analysis/InlineCost.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/PassManager.h" +#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" +#include <memory> +#include <unordered_set> namespace llvm { class BasicBlock; @@ -36,7 +36,11 @@ class OptimizationRemarkEmitter; /// requires the full C Tensorflow API library, and evaluates models /// dynamically. This mode also permits generating training logs, for offline /// training. -enum class InliningAdvisorMode : int { Default, Release, Development }; +enum class InliningAdvisorMode : int { + Default, + Release, + Development +}; class InlineAdvisor; /// Capture state between an inlining decision having had been made, and @@ -62,10 +66,7 @@ public: /// behavior by implementing the corresponding record*Impl. /// /// Call after inlining succeeded, and did not result in deleting the callee. - void recordInlining() { - markRecorded(); - recordInliningImpl(); - } + void recordInlining(); /// Call after inlining succeeded, and resulted in deleting the callee. void recordInliningWithCalleeDeleted(); @@ -111,21 +112,44 @@ private: assert(!Recorded && "Recording should happen exactly once"); Recorded = true; } + void recordInlineStatsIfNeeded(); bool Recorded = false; }; +class DefaultInlineAdvice : public InlineAdvice { +public: + DefaultInlineAdvice(InlineAdvisor *Advisor, CallBase &CB, + Optional<InlineCost> OIC, OptimizationRemarkEmitter &ORE, + bool EmitRemarks = true) + : InlineAdvice(Advisor, CB, ORE, OIC.hasValue()), OriginalCB(&CB), + OIC(OIC), EmitRemarks(EmitRemarks) {} + +private: + void recordUnsuccessfulInliningImpl(const InlineResult &Result) override; + void recordInliningWithCalleeDeletedImpl() override; + void recordInliningImpl() override; + +private: + CallBase *const OriginalCB; + Optional<InlineCost> OIC; + bool EmitRemarks; +}; + /// Interface for deciding whether to inline a call site or not. class InlineAdvisor { public: InlineAdvisor(InlineAdvisor &&) = delete; - virtual ~InlineAdvisor() { freeDeletedFunctions(); } + virtual ~InlineAdvisor(); /// Get an InlineAdvice containing a recommendation on whether to /// inline or not. \p CB is assumed to be a direct call. \p FAM is assumed to - /// be up-to-date wrt previous inlining decisions. + /// be up-to-date wrt previous inlining decisions. \p MandatoryOnly indicates + /// only mandatory (always-inline) call sites should be recommended - this + /// allows the InlineAdvisor track such inlininings. /// Returns an InlineAdvice with the inlining recommendation. - virtual std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) = 0; + std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB, + bool MandatoryOnly = false); /// This must be called when the Inliner pass is entered, to allow the /// InlineAdvisor update internal state, as result of function passes run @@ -138,9 +162,14 @@ public: virtual void onPassExit() {} protected: - InlineAdvisor(FunctionAnalysisManager &FAM) : FAM(FAM) {} + InlineAdvisor(Module &M, FunctionAnalysisManager &FAM); + virtual std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) = 0; + virtual std::unique_ptr<InlineAdvice> getMandatoryAdvice(CallBase &CB, + bool Advice); + Module &M; FunctionAnalysisManager &FAM; + std::unique_ptr<ImportedFunctionsInliningStatistics> ImportedFunctionsStats; /// We may want to defer deleting functions to after the inlining for a whole /// module has finished. This allows us to reliably use function pointers as @@ -155,6 +184,14 @@ protected: return DeletedFunctions.count(F); } + enum class MandatoryInliningKind { NotMandatory, Always, Never }; + + static MandatoryInliningKind getMandatoryKind(CallBase &CB, + FunctionAnalysisManager &FAM, + OptimizationRemarkEmitter &ORE); + + OptimizationRemarkEmitter &getCallerORE(CallBase &CB); + private: friend class InlineAdvice; void markFunctionAsDeleted(Function *F); @@ -166,11 +203,12 @@ private: /// reusable as-is for inliner pass test scenarios, as well as for regular use. class DefaultInlineAdvisor : public InlineAdvisor { public: - DefaultInlineAdvisor(FunctionAnalysisManager &FAM, InlineParams Params) - : InlineAdvisor(FAM), Params(Params) {} + DefaultInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, + InlineParams Params) + : InlineAdvisor(M, FAM), Params(Params) {} private: - std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override; + std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override; void onPassExit() override { freeDeletedFunctions(); } @@ -190,7 +228,8 @@ public: // InlineAdvisor must be preserved across analysis invalidations. return false; } - bool tryCreate(InlineParams Params, InliningAdvisorMode Mode); + bool tryCreate(InlineParams Params, InliningAdvisorMode Mode, + StringRef ReplayFile); InlineAdvisor *getAdvisor() const { return Advisor.get(); } void clear() { Advisor.reset(); } @@ -208,6 +247,12 @@ std::unique_ptr<InlineAdvisor> getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM); #endif +#ifdef LLVM_HAVE_TF_API +std::unique_ptr<InlineAdvisor> +getDevelopmentModeAdvisor(Module &M, ModuleAnalysisManager &MAM, + std::function<bool(CallBase &)> GetDefaultAdvice); +#endif + // Default (manual policy) decision making helper APIs. Shared with the legacy // pass manager inliner. @@ -226,6 +271,9 @@ void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, bool ForProfileContext = false, const char *PassName = nullptr); +/// get call site location as string +std::string getCallSiteLocation(DebugLoc DLoc); + /// Add location info to ORE message. void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc); diff --git a/llvm/include/llvm/Analysis/InlineFeaturesAnalysis.h b/llvm/include/llvm/Analysis/InlineFeaturesAnalysis.h deleted file mode 100644 index cc3f96c424e9..000000000000 --- a/llvm/include/llvm/Analysis/InlineFeaturesAnalysis.h +++ /dev/null @@ -1,45 +0,0 @@ -//===- InlineFeaturesAnalysis.h - ML Policy Feature extraction -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_INLINEFEATURESANALYSIS_H_ -#define LLVM_INLINEFEATURESANALYSIS_H_ - -#include "llvm/IR/PassManager.h" - -namespace llvm { -class Function; - -class InlineFeaturesAnalysis - : public AnalysisInfoMixin<InlineFeaturesAnalysis> { -public: - static AnalysisKey Key; - struct Result { - /// Number of basic blocks - int64_t BasicBlockCount = 0; - - /// Number of blocks reached from a conditional instruction, or that are - /// 'cases' of a SwitchInstr. - // FIXME: We may want to replace this with a more meaningful metric, like - // number of conditionally executed blocks: - // 'if (a) s();' would be counted here as 2 blocks, just like - // 'if (a) s(); else s2(); s3();' would. - int64_t BlocksReachedFromConditionalInstruction = 0; - - /// Number of uses of this function, plus 1 if the function is callable - /// outside the module. - int64_t Uses = 0; - - /// Number of direct calls made from this function to other functions - /// defined in this module. - int64_t DirectCallsToDefinedFunctions = 0; - }; - Result run(const Function &F, FunctionAnalysisManager &FAM); -}; - -} // namespace llvm -#endif // LLVM_INLINEFEATURESANALYSIS_H_
\ No newline at end of file diff --git a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h index 29a6f5914674..ab2cf52494c0 100644 --- a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h +++ b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h @@ -31,5 +31,15 @@ public: private: std::unique_ptr<TFModelEvaluator> Evaluator; }; + +class InlineSizeEstimatorAnalysisPrinterPass + : public PassInfoMixin<InlineSizeEstimatorAnalysisPrinterPass> { + raw_ostream &OS; + +public: + explicit InlineSizeEstimatorAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; } // namespace llvm -#endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
\ No newline at end of file +#endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H diff --git a/llvm/include/llvm/Analysis/InstCount.h b/llvm/include/llvm/Analysis/InstCount.h new file mode 100644 index 000000000000..e5ce822caf6e --- /dev/null +++ b/llvm/include/llvm/Analysis/InstCount.h @@ -0,0 +1,28 @@ +//===- InstCount.h - Collects the count of all instructions -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass collects the count of all instructions and reports them +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INSTCOUNT_H +#define LLVM_ANALYSIS_INSTCOUNT_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Function; + +struct InstCountPass : PassInfoMixin<InstCountPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &); +}; + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_INSTCOUNT_H diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h index 2a39a4e09087..17d6f30a35cb 100644 --- a/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -26,6 +26,10 @@ // same call context of that function (and not split between caller and callee // contexts of a directly recursive call, for example). // +// Additionally, these routines can't simplify to the instructions that are not +// def-reachable, meaning we can't just scan the basic block for instructions +// to simplify to. +// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H @@ -98,19 +102,39 @@ struct SimplifyQuery { // be safely used. const InstrInfoQuery IIQ; + /// Controls whether simplifications are allowed to constrain the range of + /// possible values for uses of undef. If it is false, simplifications are not + /// allowed to assume a particular value for a use of undef for example. + bool CanUseUndef = true; + SimplifyQuery(const DataLayout &DL, const Instruction *CXTI = nullptr) : DL(DL), CxtI(CXTI) {} SimplifyQuery(const DataLayout &DL, const TargetLibraryInfo *TLI, const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, - const Instruction *CXTI = nullptr, bool UseInstrInfo = true) - : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI), IIQ(UseInstrInfo) {} + const Instruction *CXTI = nullptr, bool UseInstrInfo = true, + bool CanUseUndef = true) + : DL(DL), TLI(TLI), DT(DT), AC(AC), CxtI(CXTI), IIQ(UseInstrInfo), + CanUseUndef(CanUseUndef) {} SimplifyQuery getWithInstruction(Instruction *I) const { SimplifyQuery Copy(*this); Copy.CxtI = I; return Copy; } + SimplifyQuery getWithoutUndef() const { + SimplifyQuery Copy(*this); + Copy.CanUseUndef = false; + return Copy; + } + + /// If CanUseUndef is true, returns whether \p V is undef. + /// Otherwise always return false. + bool isUndefValue(Value *V) const { + if (!CanUseUndef) + return false; + return isa<UndefValue>(V); + } }; // NOTE: the explicit multiple argument versions of these functions are @@ -268,6 +292,13 @@ Value *SimplifyFreezeInst(Value *Op, const SimplifyQuery &Q); Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE = nullptr); +/// See if V simplifies when its operand Op is replaced with RepOp. If not, +/// return null. +/// AllowRefinement specifies whether the simplification can be a refinement, +/// or whether it needs to be strictly identical. +Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, + const SimplifyQuery &Q, bool AllowRefinement); + /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively. /// /// This first performs a normal RAUW of I with SimpleV. It then recursively @@ -282,17 +313,6 @@ bool replaceAndRecursivelySimplify( const DominatorTree *DT = nullptr, AssumptionCache *AC = nullptr, SmallSetVector<Instruction *, 8> *UnsimplifiedUsers = nullptr); -/// Recursively attempt to simplify an instruction. -/// -/// This routine uses SimplifyInstruction to simplify 'I', and if successful -/// replaces uses of 'I' with the simplified value. It then recurses on each -/// of the users impacted. It returns true if any simplifications were -/// performed. -bool recursivelySimplifyInstruction(Instruction *I, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr); - // These helper functions return a SimplifyQuery structure that contains as // many of the optional analysis we use as are currently valid. This is the // strongly preferred way of constructing SimplifyQuery in passes. diff --git a/llvm/include/llvm/Analysis/Interval.h b/llvm/include/llvm/Analysis/Interval.h index 5c9a4535bc7f..9afe659d00dd 100644 --- a/llvm/include/llvm/Analysis/Interval.h +++ b/llvm/include/llvm/Analysis/Interval.h @@ -89,9 +89,6 @@ public: return HeaderNode == I.HeaderNode; } - /// isLoop - Find out if there is a back edge in this interval... - bool isLoop() const; - /// print - Show contents in human readable format... void print(raw_ostream &O) const; }; diff --git a/llvm/include/llvm/Analysis/IntervalIterator.h b/llvm/include/llvm/Analysis/IntervalIterator.h index efaaf9715b3d..8e2273618a66 100644 --- a/llvm/include/llvm/Analysis/IntervalIterator.h +++ b/llvm/include/llvm/Analysis/IntervalIterator.h @@ -81,7 +81,7 @@ inline void addNodeToInterval(Interval *Int, BasicBlock *BB) { // BasicBlocks are added to the interval. inline void addNodeToInterval(Interval *Int, Interval *I) { // Add all of the nodes in I as new nodes in Int. - Int->Nodes.insert(Int->Nodes.end(), I->Nodes.begin(), I->Nodes.end()); + llvm::append_range(Int->Nodes, I->Nodes); } template<class NodeTy, class OrigContainer_t, class GT = GraphTraits<NodeTy *>, @@ -227,9 +227,7 @@ private: if (Int->isSuccessor(NodeHeader)) { // If we were in the successor list from before... remove from succ list - Int->Successors.erase(std::remove(Int->Successors.begin(), - Int->Successors.end(), NodeHeader), - Int->Successors.end()); + llvm::erase_value(Int->Successors, NodeHeader); } // Now that we have discovered that Node is in the interval, perhaps some diff --git a/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h b/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h index fb6605285156..8166b52aa226 100644 --- a/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h +++ b/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h @@ -73,13 +73,7 @@ ChildrenGetterTy<BasicBlock, IsPostDom>::get(const NodeRef &N) { return {Children.begin(), Children.end()}; } - using SnapShotBBPairTy = - std::pair<const GraphDiff<BasicBlock *, IsPostDom> *, OrderedNodeTy>; - - ChildrenTy Ret; - for (const auto &SnapShotBBPair : children<SnapShotBBPairTy>({GD, N})) - Ret.emplace_back(SnapShotBBPair.second); - return Ret; + return GD->template getChildren<IsPostDom>(N); } } // end of namespace IDFCalculatorDetail diff --git a/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h index f4249f74104c..3c632f02905a 100644 --- a/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h +++ b/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h @@ -63,7 +63,7 @@ class LazyBranchProbabilityInfoPass : public FunctionPass { BranchProbabilityInfo &getCalculated() { if (!Calculated) { assert(F && LI && "call setAnalysis"); - BPI.calculate(*F, *LI, TLI, nullptr); + BPI.calculate(*F, *LI, TLI, nullptr, nullptr); Calculated = true; } return BPI; diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index ea63b837ba70..f7a5adac2b43 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -258,7 +258,6 @@ public: iterator begin() { return iterator(Edges.begin(), Edges.end()); } iterator end() { return iterator(Edges.end(), Edges.end()); } - Edge &operator[](int i) { return Edges[i]; } Edge &operator[](Node &N) { assert(EdgeIndexMap.find(&N) != EdgeIndexMap.end() && "No such edge!"); auto &E = Edges[EdgeIndexMap.find(&N)->second]; @@ -305,13 +304,6 @@ public: /// Internal helper to remove the edge to the given function. bool removeEdgeInternal(Node &ChildN); - - /// Internal helper to replace an edge key with a new one. - /// - /// This should be used when the function for a particular node in the - /// graph gets replaced and we are updating all of the edges to that node - /// to use the new function as the key. - void replaceEdgeKey(Function &OldTarget, Function &NewTarget); }; /// A node in the call graph. @@ -606,10 +598,6 @@ public: void verify(); #endif - /// Handle any necessary parent set updates after inserting a trivial ref - /// or call edge. - void handleTrivialEdgeInsertion(Node &SourceN, Node &TargetN); - public: using iterator = pointee_iterator<SmallVectorImpl<SCC *>::const_iterator>; using range = iterator_range<iterator>; @@ -1058,12 +1046,29 @@ public: /// fully visited by the DFS prior to calling this routine. void removeDeadFunction(Function &F); - /// Introduce a node for the function \p NewF in the SCC \p C. - void addNewFunctionIntoSCC(Function &NewF, SCC &C); + /// Add a new function split/outlined from an existing function. + /// + /// The new function may only reference other functions that the original + /// function did. + /// + /// The original function must reference (either directly or indirectly) the + /// new function. + /// + /// The new function may also reference the original function. + /// It may end up in a parent SCC in the case that the original function's + /// edge to the new function is a ref edge, and the edge back is a call edge. + void addSplitFunction(Function &OriginalFunction, Function &NewFunction); - /// Introduce a node for the function \p NewF, as a single node in a - /// new SCC, in the RefSCC \p RC. - void addNewFunctionIntoRefSCC(Function &NewF, RefSCC &RC); + /// Add new ref-recursive functions split/outlined from an existing function. + /// + /// The new functions may only reference other functions that the original + /// function did. The new functions may reference (not call) the original + /// function. + /// + /// The original function must reference (not call) all new functions. + /// All new functions must reference (not call) each other. + void addSplitRefRecursiveFunctions(Function &OriginalFunction, + ArrayRef<Function *> NewFunctions); ///@} @@ -1168,16 +1173,14 @@ private: /// the NodeMap. Node &insertInto(Function &F, Node *&MappedN); + /// Helper to initialize a new node created outside of creating SCCs and add + /// it to the NodeMap if necessary. For example, useful when a function is + /// split. + Node &initNode(Function &F); + /// Helper to update pointers back to the graph object during moves. void updateGraphPtrs(); - /// Helper to insert a new function, add it to the NodeMap, and populate its - /// node. - Node &createNode(Function &F); - - /// Helper to add the given Node \p N to the SCCMap, mapped to the SCC \p C. - void addNodeToSCC(SCC &C, Node &N); - /// Allocates an SCC and constructs it using the graph allocator. /// /// The arguments are forwarded to the constructor. diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h index 1bc88235273e..363cb49af382 100644 --- a/llvm/include/llvm/Analysis/LazyValueInfo.h +++ b/llvm/include/llvm/Analysis/LazyValueInfo.h @@ -71,20 +71,20 @@ public: Instruction *CxtI = nullptr); /// Determine whether the specified value comparison with a constant is known - /// to be true or false at the specified instruction - /// (from an assume intrinsic). Pred is a CmpInst predicate. + /// to be true or false at the specified instruction. + /// \p Pred is a CmpInst predicate. If \p UseBlockValue is true, the block + /// value is also taken into account. Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C, - Instruction *CxtI); + Instruction *CxtI, bool UseBlockValue = false); - /// Determine whether the specified value is known to be a - /// constant at the end of the specified block. Return null if not. - Constant *getConstant(Value *V, BasicBlock *BB, Instruction *CxtI = nullptr); + /// Determine whether the specified value is known to be a constant at the + /// specified instruction. Return null if not. + Constant *getConstant(Value *V, Instruction *CxtI); /// Return the ConstantRange constraint that is known to hold for the - /// specified value at the end of the specified block. This may only be called + /// specified value at the specified instruction. This may only be called /// on integer-typed Values. - ConstantRange getConstantRange(Value *V, BasicBlock *BB, - Instruction *CxtI = nullptr, + ConstantRange getConstantRange(Value *V, Instruction *CxtI, bool UndefAllowed = true); /// Determine whether the specified value is known to be a diff --git a/llvm/include/llvm/Analysis/Lint.h b/llvm/include/llvm/Analysis/Lint.h index 0fea81e215c9..6eb637e72782 100644 --- a/llvm/include/llvm/Analysis/Lint.h +++ b/llvm/include/llvm/Analysis/Lint.h @@ -19,30 +19,30 @@ #ifndef LLVM_ANALYSIS_LINT_H #define LLVM_ANALYSIS_LINT_H +#include "llvm/IR/PassManager.h" + namespace llvm { class FunctionPass; class Module; class Function; -/// Create a lint pass. -/// -/// Check a module or function. -FunctionPass *createLintPass(); +FunctionPass *createLintLegacyPassPass(); -/// Check a module. +/// Lint a module. /// /// This should only be used for debugging, because it plays games with /// PassManagers and stuff. -void lintModule( - const Module &M ///< The module to be checked -); +void lintModule(const Module &M); + +// Lint a function. +void lintFunction(const Function &F); -// lintFunction - Check a function. -void lintFunction( - const Function &F ///< The function to be checked -); +class LintPass : public PassInfoMixin<LintPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; -} // End llvm namespace +} // namespace llvm -#endif +#endif // LLVM_ANALYSIS_LINT_H diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 5665a802942d..24a05610e68d 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -155,6 +155,15 @@ Value *FindAvailablePtrLoadStore(Value *Ptr, Type *AccessTy, bool AtLeastAtomic, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst); + +/// Returns true if a pointer value \p A can be replace with another pointer +/// value \B if they are deemed equal through some means (e.g. information from +/// conditions). +/// NOTE: the current implementations is incomplete and unsound. It does not +/// reject all invalid cases yet, but will be made stricter in the future. In +/// particular this means returning true means unknown if replacement is safe. +bool canReplacePointersIfEqual(Value *A, Value *B, const DataLayout &DL, + Instruction *CtxI); } #endif diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index a5237e9ba59e..13fbe884eddf 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -171,7 +171,8 @@ public: MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L) : PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeDepDistBytes(0), - MaxSafeRegisterWidth(-1U), FoundNonConstantDistanceDependence(false), + MaxSafeVectorWidthInBits(-1U), + FoundNonConstantDistanceDependence(false), Status(VectorizationSafetyStatus::Safe), RecordDependences(true) {} /// Register the location (instructions are given increasing numbers) @@ -204,13 +205,21 @@ public: return Status == VectorizationSafetyStatus::Safe; } + /// Return true if the number of elements that are safe to operate on + /// simultaneously is not bounded. + bool isSafeForAnyVectorWidth() const { + return MaxSafeVectorWidthInBits == UINT_MAX; + } + /// The maximum number of bytes of a vector register we can vectorize /// the accesses safely with. uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; } /// Return the number of elements that are safe to operate on /// simultaneously, multiplied by the size of the element in bits. - uint64_t getMaxSafeRegisterWidth() const { return MaxSafeRegisterWidth; } + uint64_t getMaxSafeVectorWidthInBits() const { + return MaxSafeVectorWidthInBits; + } /// In same cases when the dependency check fails we can still /// vectorize the loop with a dynamic array access check. @@ -275,7 +284,7 @@ private: /// operate on simultaneously, multiplied by the size of the element in bits. /// The size of the element is taken from the memory access that is most /// restrictive. - uint64_t MaxSafeRegisterWidth; + uint64_t MaxSafeVectorWidthInBits; /// If we see a non-constant dependence distance we can still try to /// vectorize this loop with runtime checks. @@ -418,7 +427,7 @@ public: bool UseDependencies); /// Returns the checks that generateChecks created. - const SmallVector<RuntimePointerCheck, 4> &getChecks() const { + const SmallVectorImpl<RuntimePointerCheck> &getChecks() const { return Checks; } diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h index 0e162e03bde1..11dbd15c8678 100644 --- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h +++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h @@ -57,6 +57,7 @@ struct LoopStandardAnalysisResults { ScalarEvolution &SE; TargetLibraryInfo &TLI; TargetTransformInfo &TTI; + BlockFrequencyInfo *BFI; MemorySSA *MSSA; }; diff --git a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h index ffec78b6db2c..e8f2205545eb 100644 --- a/llvm/include/llvm/Analysis/LoopCacheAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopCacheAnalysis.h @@ -14,19 +14,20 @@ #ifndef LLVM_ANALYSIS_LOOPCACHEANALYSIS_H #define LLVM_ANALYSIS_LOOPCACHEANALYSIS_H -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/LoopAnalysisManager.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Instructions.h" -#include "llvm/Pass.h" +#include "llvm/IR/PassManager.h" #include "llvm/Support/raw_ostream.h" namespace llvm { +class AAResults; +class DependenceInfo; class LPMUpdater; +class ScalarEvolution; +class SCEV; +class TargetTransformInfo; + using CacheCostTy = int64_t; using LoopVectorTy = SmallVector<Loop *, 8>; @@ -70,7 +71,7 @@ public: /// the same chace line iff the distance between them in the innermost /// dimension is less than the cache line size. Return None if unsure. Optional<bool> hasSpacialReuse(const IndexedReference &Other, unsigned CLS, - AliasAnalysis &AA) const; + AAResults &AA) const; /// Return true if the current object and the indexed reference \p Other /// have distance smaller than \p MaxDistance in the dimension associated with @@ -78,7 +79,7 @@ public: /// MaxDistance and None if unsure. Optional<bool> hasTemporalReuse(const IndexedReference &Other, unsigned MaxDistance, const Loop &L, - DependenceInfo &DI, AliasAnalysis &AA) const; + DependenceInfo &DI, AAResults &AA) const; /// Compute the cost of the reference w.r.t. the given loop \p L when it is /// considered in the innermost position in the loop nest. @@ -118,7 +119,7 @@ private: /// Return true if the given reference \p Other is definetely aliased with /// the indexed reference represented by this class. - bool isAliased(const IndexedReference &Other, AliasAnalysis &AA) const; + bool isAliased(const IndexedReference &Other, AAResults &AA) const; private: /// True if the reference can be delinearized, false otherwise. @@ -183,7 +184,7 @@ public: /// between array elements accessed in a loop so that the elements are /// classified to have temporal reuse. CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI, ScalarEvolution &SE, - TargetTransformInfo &TTI, AliasAnalysis &AA, DependenceInfo &DI, + TargetTransformInfo &TTI, AAResults &AA, DependenceInfo &DI, Optional<unsigned> TRT = None); /// Create a CacheCost for the loop nest rooted by \p Root. @@ -197,9 +198,9 @@ public: /// Return the estimated cost of loop \p L if the given loop is part of the /// loop nest associated with this object. Return -1 otherwise. CacheCostTy getLoopCost(const Loop &L) const { - auto IT = std::find_if( - LoopCosts.begin(), LoopCosts.end(), - [&L](const LoopCacheCostTy &LCC) { return LCC.first == &L; }); + auto IT = llvm::find_if(LoopCosts, [&L](const LoopCacheCostTy &LCC) { + return LCC.first == &L; + }); return (IT != LoopCosts.end()) ? (*IT).second : -1; } @@ -258,7 +259,7 @@ private: const LoopInfo &LI; ScalarEvolution &SE; TargetTransformInfo &TTI; - AliasAnalysis &AA; + AAResults &AA; DependenceInfo &DI; }; diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index 35fe2a03a2a2..a5717bae12c3 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -155,7 +155,17 @@ public: iterator end() const { return getSubLoops().end(); } reverse_iterator rbegin() const { return getSubLoops().rbegin(); } reverse_iterator rend() const { return getSubLoops().rend(); } - bool empty() const { return getSubLoops().empty(); } + + // LoopInfo does not detect irreducible control flow, just natural + // loops. That is, it is possible that there is cyclic control + // flow within the "innermost loop" or around the "outermost + // loop". + + /// Return true if the loop does not contain any (natural) loops. + bool isInnermost() const { return getSubLoops().empty(); } + /// Return true if the loop does not have a parent (natural) loop + // (i.e. it is outermost, which is the same as top-level). + bool isOutermost() const { return getParentLoop() == nullptr; } /// Get a list of the basic blocks which make up this loop. ArrayRef<BlockT *> getBlocks() const { @@ -292,6 +302,9 @@ public: /// Otherwise return null. BlockT *getUniqueExitBlock() const; + /// Return true if this loop does not have any exit blocks. + bool hasNoExitBlocks() const; + /// Edge type. typedef std::pair<BlockT *, BlockT *> Edge; @@ -830,6 +843,9 @@ public: /// unrolling pass is run more than once (which it generally is). void setLoopAlreadyUnrolled(); + /// Add llvm.loop.mustprogress to this loop's loop id metadata. + void setLoopMustProgress(); + void dump() const; void dumpVerbose() const; @@ -974,7 +990,7 @@ public: LoopT *removeLoop(iterator I) { assert(I != end() && "Cannot remove end iterator!"); LoopT *L = *I; - assert(!L->getParentLoop() && "Not a top-level loop!"); + assert(L->isOutermost() && "Not a top-level loop!"); TopLevelLoops.erase(TopLevelLoops.begin() + (I - begin())); return L; } @@ -1002,7 +1018,7 @@ public: /// This adds the specified loop to the collection of top-level loops. void addTopLevelLoop(LoopT *New) { - assert(!New->getParentLoop() && "Loop already in subloop!"); + assert(New->isOutermost() && "Loop already in subloop!"); TopLevelLoops.push_back(New); } diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h index 58a4abafcc85..426b349c6b8a 100644 --- a/llvm/include/llvm/Analysis/LoopInfoImpl.h +++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h @@ -68,6 +68,13 @@ void LoopBase<BlockT, LoopT>::getExitBlocks( ExitBlocks.push_back(Succ); } +template <class BlockT, class LoopT> +bool LoopBase<BlockT, LoopT>::hasNoExitBlocks() const { + SmallVector<BlockT *, 8> ExitBlocks; + getExitBlocks(ExitBlocks); + return ExitBlocks.empty(); +} + /// getExitBlock - If getExitBlocks would return exactly one block, /// return that block. Otherwise return null. template <class BlockT, class LoopT> @@ -502,7 +509,7 @@ void PopulateLoopsDFS<BlockT, LoopT>::insertIntoLoop(BlockT *Block) { if (Subloop && Block == Subloop->getHeader()) { // We reach this point once per subloop after processing all the blocks in // the subloop. - if (Subloop->getParentLoop()) + if (!Subloop->isOutermost()) Subloop->getParentLoop()->getSubLoopsVector().push_back(Subloop); else LI->addTopLevelLoop(Subloop); @@ -666,12 +673,13 @@ static void compareLoops(const LoopT *L, const LoopT *OtherL, "Mismatched basic blocks in the loops!"); const SmallPtrSetImpl<const BlockT *> &BlocksSet = L->getBlocksSet(); - const SmallPtrSetImpl<const BlockT *> &OtherBlocksSet = L->getBlocksSet(); + const SmallPtrSetImpl<const BlockT *> &OtherBlocksSet = + OtherL->getBlocksSet(); assert(BlocksSet.size() == OtherBlocksSet.size() && - std::all_of(BlocksSet.begin(), BlocksSet.end(), - [&OtherBlocksSet](const BlockT *BB) { - return OtherBlocksSet.count(BB); - }) && + llvm::all_of(BlocksSet, + [&OtherBlocksSet](const BlockT *BB) { + return OtherBlocksSet.count(BB); + }) && "Mismatched basic blocks in BlocksSets!"); } #endif @@ -681,7 +689,7 @@ void LoopInfoBase<BlockT, LoopT>::verify( const DomTreeBase<BlockT> &DomTree) const { DenseSet<const LoopT *> Loops; for (iterator I = begin(), E = end(); I != E; ++I) { - assert(!(*I)->getParentLoop() && "Top-level loop has a parent!"); + assert((*I)->isOutermost() && "Top-level loop has a parent!"); (*I)->verifyLoopNest(&Loops); } diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h index 792958a312ce..9c4fb4dbc29b 100644 --- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -14,6 +14,7 @@ #ifndef LLVM_ANALYSIS_LOOPNESTANALYSIS_H #define LLVM_ANALYSIS_LOOPNESTANALYSIS_H +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" @@ -59,6 +60,12 @@ public: /// getMaxPerfectDepth(Loop_i) would return 2. static unsigned getMaxPerfectDepth(const Loop &Root, ScalarEvolution &SE); + /// Recursivelly traverse all empty 'single successor' basic blocks of \p From + /// (if there are any). Return the last basic block found or \p End if it was + /// reached during the search. + static const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From, + const BasicBlock *End); + /// Return the outermost loop in the loop nest. Loop &getOutermostLoop() const { return *Loops.front(); } @@ -124,10 +131,16 @@ public: /// Return true if all loops in the loop nest are in simplify form. bool areAllLoopsSimplifyForm() const { - return llvm::all_of(Loops, - [](const Loop *L) { return L->isLoopSimplifyForm(); }); + return all_of(Loops, [](const Loop *L) { return L->isLoopSimplifyForm(); }); + } + + /// Return true if all loops in the loop nest are in rotated form. + bool areAllLoopsRotatedForm() const { + return all_of(Loops, [](const Loop *L) { return L->isRotatedForm(); }); } + StringRef getName() const { return Loops.front()->getName(); } + protected: const unsigned MaxPerfectDepth; // maximum perfect nesting depth level. LoopVectorTy Loops; // the loops in the nest (in breadth first order). diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h index cbe3b1f1f4e6..54edbb823263 100644 --- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h @@ -31,8 +31,6 @@ public: void onPassEntry() override; - std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override; - int64_t getIRSize(const Function &F) const { return F.getInstructionCount(); } void onSuccessfulInlining(const MLInlineAdvice &Advice, bool CalleeWasDeleted); @@ -42,13 +40,16 @@ public: const MLModelRunner &getModelRunner() const { return *ModelRunner.get(); } protected: - virtual std::unique_ptr<MLInlineAdvice> - getMandatoryAdvice(CallBase &CB, OptimizationRemarkEmitter &ORE); + std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override; + + std::unique_ptr<InlineAdvice> getMandatoryAdvice(CallBase &CB, + bool Advice) override; + + virtual std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB); virtual std::unique_ptr<MLInlineAdvice> getAdviceFromModel(CallBase &CB, OptimizationRemarkEmitter &ORE); - Module &M; std::unique_ptr<MLModelRunner> ModelRunner; private: @@ -104,4 +105,4 @@ private: } // namespace llvm -#endif // LLVM_ANALYSIS_MLINLINEADVISOR_H
\ No newline at end of file +#endif // LLVM_ANALYSIS_MLINLINEADVISOR_H diff --git a/llvm/include/llvm/Analysis/MemDerefPrinter.h b/llvm/include/llvm/Analysis/MemDerefPrinter.h new file mode 100644 index 000000000000..bafdc543eeaf --- /dev/null +++ b/llvm/include/llvm/Analysis/MemDerefPrinter.h @@ -0,0 +1,24 @@ +//===- MemDerefPrinter.h - Printer for isDereferenceablePointer -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MEMDEREFPRINTER_H +#define LLVM_ANALYSIS_MEMDEREFPRINTER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class MemDerefPrinterPass : public PassInfoMixin<MemDerefPrinterPass> { + raw_ostream &OS; + +public: + MemDerefPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // namespace llvm + +#endif // LLVM_ANALYSIS_MEMDEREFPRINTER_H diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h index 0777dc7d7862..efde00f82d57 100644 --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -302,7 +302,7 @@ private: /// The maximum size of the dereferences of the pointer. /// /// May be UnknownSize if the sizes are unknown. - LocationSize Size = LocationSize::unknown(); + LocationSize Size = LocationSize::afterPointer(); /// The AA tags associated with dereferences of the pointer. /// /// The members may be null if there are no tags or conflicting tags. diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h index d01ac7da85cd..3b188d763ef2 100644 --- a/llvm/include/llvm/Analysis/MemoryLocation.h +++ b/llvm/include/llvm/Analysis/MemoryLocation.h @@ -64,10 +64,11 @@ class VAArgInst; // None. class LocationSize { enum : uint64_t { - Unknown = ~uint64_t(0), + BeforeOrAfterPointer = ~uint64_t(0), + AfterPointer = BeforeOrAfterPointer - 1, + MapEmpty = BeforeOrAfterPointer - 2, + MapTombstone = BeforeOrAfterPointer - 3, ImpreciseBit = uint64_t(1) << 63, - MapEmpty = Unknown - 1, - MapTombstone = Unknown - 2, // The maximum value we can represent without falling back to 'unknown'. MaxValue = (MapTombstone - 1) & ~ImpreciseBit, @@ -81,7 +82,11 @@ class LocationSize { constexpr LocationSize(uint64_t Raw, DirectConstruction): Value(Raw) {} - static_assert(Unknown & ImpreciseBit, "Unknown is imprecise by definition."); + static_assert(AfterPointer & ImpreciseBit, + "AfterPointer is imprecise by definition."); + static_assert(BeforeOrAfterPointer & ImpreciseBit, + "BeforeOrAfterPointer is imprecise by definition."); + public: // FIXME: Migrate all users to construct via either `precise` or `upperBound`, // to make it more obvious at the callsite the kind of size that they're @@ -90,12 +95,12 @@ public: // Since the overwhelming majority of users of this provide precise values, // this assumes the provided value is precise. constexpr LocationSize(uint64_t Raw) - : Value(Raw > MaxValue ? Unknown : Raw) {} + : Value(Raw > MaxValue ? AfterPointer : Raw) {} static LocationSize precise(uint64_t Value) { return LocationSize(Value); } static LocationSize precise(TypeSize Value) { if (Value.isScalable()) - return unknown(); + return afterPointer(); return precise(Value.getFixedSize()); } @@ -104,17 +109,25 @@ public: if (LLVM_UNLIKELY(Value == 0)) return precise(0); if (LLVM_UNLIKELY(Value > MaxValue)) - return unknown(); + return afterPointer(); return LocationSize(Value | ImpreciseBit, Direct); } static LocationSize upperBound(TypeSize Value) { if (Value.isScalable()) - return unknown(); + return afterPointer(); return upperBound(Value.getFixedSize()); } - constexpr static LocationSize unknown() { - return LocationSize(Unknown, Direct); + /// Any location after the base pointer (but still within the underlying + /// object). + constexpr static LocationSize afterPointer() { + return LocationSize(AfterPointer, Direct); + } + + /// Any location before or after the base pointer (but still within the + /// underlying object). + constexpr static LocationSize beforeOrAfterPointer() { + return LocationSize(BeforeOrAfterPointer, Direct); } // Sentinel values, generally used for maps. @@ -131,20 +144,24 @@ public: if (Other == *this) return *this; - if (!hasValue() || !Other.hasValue()) - return unknown(); + if (Value == BeforeOrAfterPointer || Other.Value == BeforeOrAfterPointer) + return beforeOrAfterPointer(); + if (Value == AfterPointer || Other.Value == AfterPointer) + return afterPointer(); return upperBound(std::max(getValue(), Other.getValue())); } - bool hasValue() const { return Value != Unknown; } + bool hasValue() const { + return Value != AfterPointer && Value != BeforeOrAfterPointer; + } uint64_t getValue() const { assert(hasValue() && "Getting value from an unknown LocationSize!"); return Value & ~ImpreciseBit; } // Returns whether or not this value is precise. Note that if a value is - // precise, it's guaranteed to not be `unknown()`. + // precise, it's guaranteed to not be unknown. bool isPrecise() const { return (Value & ImpreciseBit) == 0; } @@ -152,6 +169,9 @@ public: // Convenience method to check if this LocationSize's value is 0. bool isZero() const { return hasValue() && getValue() == 0; } + /// Whether accesses before the base pointer are possible. + bool mayBeBeforePointer() const { return Value == BeforeOrAfterPointer; } + bool operator==(const LocationSize &Other) const { return Value == Other.Value; } @@ -242,14 +262,30 @@ public: return getForArgument(Call, ArgIdx, &TLI); } + /// Return a location that may access any location after Ptr, while remaining + /// within the underlying object. + static MemoryLocation getAfter(const Value *Ptr, + const AAMDNodes &AATags = AAMDNodes()) { + return MemoryLocation(Ptr, LocationSize::afterPointer(), AATags); + } + + /// Return a location that may access any location before or after Ptr, while + /// remaining within the underlying object. + static MemoryLocation + getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags = AAMDNodes()) { + return MemoryLocation(Ptr, LocationSize::beforeOrAfterPointer(), AATags); + } + // Return the exact size if the exact size is known at compiletime, // otherwise return MemoryLocation::UnknownSize. static uint64_t getSizeOrUnknown(const TypeSize &T) { return T.isScalable() ? UnknownSize : T.getFixedSize(); } - explicit MemoryLocation(const Value *Ptr = nullptr, - LocationSize Size = LocationSize::unknown(), + MemoryLocation() + : Ptr(nullptr), Size(LocationSize::beforeOrAfterPointer()), AATags() {} + + explicit MemoryLocation(const Value *Ptr, LocationSize Size, const AAMDNodes &AATags = AAMDNodes()) : Ptr(Ptr), Size(Size), AATags(AATags) {} diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index 5ce2b3fd047f..63c031b1921f 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -88,6 +88,7 @@ #include "llvm/IR/DerivedUser.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" @@ -108,6 +109,7 @@ namespace llvm { /// Enables memory ssa as a dependency for loop passes. extern cl::opt<bool> EnableMSSALoopDependency; +class AllocaInst; class Function; class Instruction; class MemoryAccess; @@ -270,7 +272,7 @@ public: // Retrieve AliasResult type of the optimized access. Ideally this would be // returned by the caching walker and may go away in the future. Optional<AliasResult> getOptimizedAccessType() const { - return OptimizedAccessAlias; + return isOptimized() ? OptimizedAccessAlias : None; } /// Reset the ID of what this MemoryUse was optimized to, causing it to @@ -840,7 +842,6 @@ private: CachingWalker<AliasAnalysis> *getWalkerImpl(); void buildMemorySSA(BatchAAResults &BAA); - void optimizeUses(); void prepareForMoveTo(MemoryAccess *, BasicBlock *); void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const; @@ -848,15 +849,11 @@ private: using AccessMap = DenseMap<const BasicBlock *, std::unique_ptr<AccessList>>; using DefsMap = DenseMap<const BasicBlock *, std::unique_ptr<DefsList>>; - void - determineInsertionPoint(const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks); void markUnreachableAsLiveOnEntry(BasicBlock *BB); - bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const; MemoryPhi *createMemoryPhi(BasicBlock *BB); template <typename AliasAnalysisType> MemoryUseOrDef *createNewAccess(Instruction *, AliasAnalysisType *, const MemoryUseOrDef *Template = nullptr); - MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace); void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &); MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *, bool); void renameSuccessorPhis(BasicBlock *, MemoryAccess *, bool); @@ -1181,9 +1178,11 @@ class upward_defs_iterator using BaseT = upward_defs_iterator::iterator_facade_base; public: - upward_defs_iterator(const MemoryAccessPair &Info, DominatorTree *DT) + upward_defs_iterator(const MemoryAccessPair &Info, DominatorTree *DT, + bool *PerformedPhiTranslation = nullptr) : DefIterator(Info.first), Location(Info.second), - OriginalAccess(Info.first), DT(DT) { + OriginalAccess(Info.first), DT(DT), + PerformedPhiTranslation(PerformedPhiTranslation) { CurrentPair.first = nullptr; WalkingPhi = Info.first && isa<MemoryPhi>(Info.first); @@ -1215,38 +1214,60 @@ public: BasicBlock *getPhiArgBlock() const { return DefIterator.getPhiArgBlock(); } private: + /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible + /// loop. In particular, this guarantees that it only references a single + /// MemoryLocation during execution of the containing function. + bool IsGuaranteedLoopInvariant(Value *Ptr) const; + void fillInCurrentPair() { CurrentPair.first = *DefIterator; + CurrentPair.second = Location; if (WalkingPhi && Location.Ptr) { + // Mark size as unknown, if the location is not guaranteed to be + // loop-invariant for any possible loop in the function. Setting the size + // to unknown guarantees that any memory accesses that access locations + // after the pointer are considered as clobbers, which is important to + // catch loop carried dependences. + if (Location.Ptr && + !IsGuaranteedLoopInvariant(const_cast<Value *>(Location.Ptr))) + CurrentPair.second = + Location.getWithNewSize(LocationSize::beforeOrAfterPointer()); PHITransAddr Translator( const_cast<Value *>(Location.Ptr), OriginalAccess->getBlock()->getModule()->getDataLayout(), nullptr); + if (!Translator.PHITranslateValue(OriginalAccess->getBlock(), DefIterator.getPhiArgBlock(), DT, - false)) { - if (Translator.getAddr() != Location.Ptr) { - CurrentPair.second = Location.getWithNewPtr(Translator.getAddr()); - return; + true)) { + Value *TransAddr = Translator.getAddr(); + if (TransAddr != Location.Ptr) { + CurrentPair.second = CurrentPair.second.getWithNewPtr(TransAddr); + + if (TransAddr && + !IsGuaranteedLoopInvariant(const_cast<Value *>(TransAddr))) + CurrentPair.second = CurrentPair.second.getWithNewSize( + LocationSize::beforeOrAfterPointer()); + + if (PerformedPhiTranslation) + *PerformedPhiTranslation = true; } - } else { - CurrentPair.second = Location.getWithNewSize(LocationSize::unknown()); - return; } } - CurrentPair.second = Location; } MemoryAccessPair CurrentPair; memoryaccess_def_iterator DefIterator; MemoryLocation Location; MemoryAccess *OriginalAccess = nullptr; - bool WalkingPhi = false; DominatorTree *DT = nullptr; + bool WalkingPhi = false; + bool *PerformedPhiTranslation = nullptr; }; -inline upward_defs_iterator upward_defs_begin(const MemoryAccessPair &Pair, - DominatorTree &DT) { - return upward_defs_iterator(Pair, &DT); +inline upward_defs_iterator +upward_defs_begin(const MemoryAccessPair &Pair, DominatorTree &DT, + bool *PerformedPhiTranslation = nullptr) { + return upward_defs_iterator(Pair, &DT, PerformedPhiTranslation); } inline upward_defs_iterator upward_defs_end() { return upward_defs_iterator(); } diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h index 20588ef083c5..b0bf2e5ead62 100644 --- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h +++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h @@ -52,8 +52,6 @@ class LoopBlocksRPO; using ValueToValueMapTy = ValueMap<const Value *, WeakTrackingVH>; using PhiToDefMap = SmallDenseMap<MemoryPhi *, MemoryAccess *>; using CFGUpdate = cfg::Update<BasicBlock *>; -using GraphDiffInvBBPair = - std::pair<const GraphDiff<BasicBlock *> *, Inverse<BasicBlock *>>; class MemorySSAUpdater { private: @@ -121,8 +119,11 @@ public: ArrayRef<BasicBlock *> ExitBlocks, ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps, DominatorTree &DT); - /// Apply CFG updates, analogous with the DT edge updates. - void applyUpdates(ArrayRef<CFGUpdate> Updates, DominatorTree &DT); + /// Apply CFG updates, analogous with the DT edge updates. By default, the + /// DT is assumed to be already up to date. If UpdateDTFirst is true, first + /// update the DT with the same updates. + void applyUpdates(ArrayRef<CFGUpdate> Updates, DominatorTree &DT, + bool UpdateDTFirst = false); /// Apply CFG insert updates, analogous with the DT edge updates. void applyInsertUpdates(ArrayRef<CFGUpdate> Updates, DominatorTree &DT); diff --git a/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h b/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h new file mode 100644 index 000000000000..99aa315319b8 --- /dev/null +++ b/llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h @@ -0,0 +1,29 @@ +//===- ModuleDebugInfoPrinter.h - -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MODULEDEBUGINFOPRINTER_H +#define LLVM_ANALYSIS_MODULEDEBUGINFOPRINTER_H + +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +class ModuleDebugInfoPrinterPass + : public PassInfoMixin<ModuleDebugInfoPrinterPass> { + DebugInfoFinder Finder; + raw_ostream &OS; + +public: + explicit ModuleDebugInfoPrinterPass(raw_ostream &OS); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; +} // end namespace llvm + +#endif // LLVM_ANALYSIS_MODULEDEBUGINFOPRINTER_H diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h index a3b7bee97808..df489aaa534d 100644 --- a/llvm/include/llvm/Analysis/MustExecute.h +++ b/llvm/include/llvm/Analysis/MustExecute.h @@ -27,6 +27,8 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionPrecedenceTracking.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { @@ -541,6 +543,23 @@ private: MustBeExecutedIterator EndIterator; }; +class MustExecutePrinterPass : public PassInfoMixin<MustExecutePrinterPass> { + raw_ostream &OS; + +public: + MustExecutePrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +class MustBeExecutedContextPrinterPass + : public PassInfoMixin<MustBeExecutedContextPrinterPass> { + raw_ostream &OS; + +public: + MustBeExecutedContextPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + } // namespace llvm #endif diff --git a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h index cad1c52f7f87..16c5f6701da0 100644 --- a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h +++ b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h @@ -23,7 +23,6 @@ #define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H #include "llvm/ADT/Optional.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ObjCARCInstKind.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" @@ -31,6 +30,9 @@ #include "llvm/IR/ValueHandle.h" namespace llvm { + +class AAResults; + namespace objcarc { /// A handy option to enable/disable all ARC Optimizations. @@ -64,10 +66,9 @@ inline bool ModuleHasARC(const Module &M) { /// This is a wrapper around getUnderlyingObject which also knows how to /// look through objc_retain and objc_autorelease calls, which we know to return /// their argument verbatim. -inline const Value *GetUnderlyingObjCPtr(const Value *V, - const DataLayout &DL) { +inline const Value *GetUnderlyingObjCPtr(const Value *V) { for (;;) { - V = GetUnderlyingObject(V, DL); + V = getUnderlyingObject(V); if (!IsForwarding(GetBasicARCInstKind(V))) break; V = cast<CallInst>(V)->getArgOperand(0); @@ -78,12 +79,12 @@ inline const Value *GetUnderlyingObjCPtr(const Value *V, /// A wrapper for GetUnderlyingObjCPtr used for results memoization. inline const Value * -GetUnderlyingObjCPtrCached(const Value *V, const DataLayout &DL, +GetUnderlyingObjCPtrCached(const Value *V, DenseMap<const Value *, WeakTrackingVH> &Cache) { if (auto InCache = Cache.lookup(V)) return InCache; - const Value *Computed = GetUnderlyingObjCPtr(V, DL); + const Value *Computed = GetUnderlyingObjCPtr(V); Cache[V] = const_cast<Value *>(Computed); return Computed; } @@ -146,7 +147,7 @@ inline bool IsPotentialRetainableObjPtr(const Value *Op) { return false; // Special arguments can not be a valid retainable object pointer. if (const Argument *Arg = dyn_cast<Argument>(Op)) - if (Arg->hasPassPointeeByValueAttr() || Arg->hasNestAttr() || + if (Arg->hasPassPointeeByValueCopyAttr() || Arg->hasNestAttr() || Arg->hasStructRetAttr()) return false; // Only consider values with pointer types. @@ -162,24 +163,7 @@ inline bool IsPotentialRetainableObjPtr(const Value *Op) { return true; } -inline bool IsPotentialRetainableObjPtr(const Value *Op, - AliasAnalysis &AA) { - // First make the rudimentary check. - if (!IsPotentialRetainableObjPtr(Op)) - return false; - - // Objects in constant memory are not reference-counted. - if (AA.pointsToConstantMemory(Op)) - return false; - - // Pointers in constant memory are not pointing to reference-counted objects. - if (const LoadInst *LI = dyn_cast<LoadInst>(Op)) - if (AA.pointsToConstantMemory(LI->getPointerOperand())) - return false; - - // Otherwise assume the worst. - return true; -} +bool IsPotentialRetainableObjPtr(const Value *Op, AAResults &AA); /// Helper for GetARCInstKind. Determines what kind of construct CS /// is. diff --git a/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h b/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h index ab97d5b8504e..9815dd05cd1c 100644 --- a/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h +++ b/llvm/include/llvm/Analysis/OptimizationRemarkEmitter.h @@ -88,8 +88,14 @@ public: /// provide more context so that non-trivial false positives can be quickly /// detected by the user. bool allowExtraAnalysis(StringRef PassName) const { - return (F->getContext().getLLVMRemarkStreamer() || - F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled(PassName)); + return OptimizationRemarkEmitter::allowExtraAnalysis(*F, PassName); + } + static bool allowExtraAnalysis(const Function &F, StringRef PassName) { + return allowExtraAnalysis(F.getContext(), PassName); + } + static bool allowExtraAnalysis(LLVMContext &Ctx, StringRef PassName) { + return Ctx.getLLVMRemarkStreamer() || + Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(PassName); } private: diff --git a/llvm/include/llvm/Analysis/PhiValues.h b/llvm/include/llvm/Analysis/PhiValues.h index ea879d727282..c0e91c8b0bdf 100644 --- a/llvm/include/llvm/Analysis/PhiValues.h +++ b/llvm/include/llvm/Analysis/PhiValues.h @@ -21,7 +21,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" @@ -40,7 +40,7 @@ class Function; /// it is queried. class PhiValues { public: - using ValueSet = SmallPtrSet<Value *, 4>; + using ValueSet = SmallSetVector<Value *, 4>; /// Construct an empty PhiValues. PhiValues(const Function &F) : F(F) {} @@ -70,8 +70,7 @@ public: FunctionAnalysisManager::Invalidator &); private: - using PhiSet = SmallPtrSet<const PHINode *, 4>; - using ConstValueSet = SmallPtrSet<const Value *, 4>; + using ConstValueSet = SmallSetVector<const Value *, 4>; /// The next depth number to be used by processPhi. unsigned int NextDepthNumber = 1; diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h index a1fea9fefc9a..a4e6ffc3dd58 100644 --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -25,7 +25,6 @@ class BasicBlock; class BlockFrequencyInfo; class CallBase; class Function; -class ProfileSummary; /// Analysis providing profile information. /// @@ -39,7 +38,7 @@ class ProfileSummary; // units. This would require making this depend on BFI. class ProfileSummaryInfo { private: - Module &M; + const Module &M; std::unique_ptr<ProfileSummary> Summary; void computeThresholds(); // Count thresholds to answer isHotCount and isColdCount queries. @@ -59,7 +58,8 @@ private: mutable DenseMap<int, uint64_t> ThresholdCache; public: - ProfileSummaryInfo(Module &M) : M(M) { refresh(); } + ProfileSummaryInfo(const Module &M) : M(M) { refresh(); } + ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default; /// If no summary is present, attempt to refresh. diff --git a/llvm/include/llvm/Analysis/RegionInfo.h b/llvm/include/llvm/Analysis/RegionInfo.h index b0336c559774..f93081d6f51d 100644 --- a/llvm/include/llvm/Analysis/RegionInfo.h +++ b/llvm/include/llvm/Analysis/RegionInfo.h @@ -59,7 +59,6 @@ namespace llvm { class DominanceFrontier; -class DominatorTree; class Loop; class LoopInfo; class PostDominatorTree; @@ -877,8 +876,6 @@ public: void verifyAnalysis() const; }; -class Region; - class RegionNode : public RegionNodeBase<RegionTraits<Function>> { public: inline RegionNode(Region *Parent, BasicBlock *Entry, bool isSubRegion = false) diff --git a/llvm/include/llvm/Analysis/RegionInfoImpl.h b/llvm/include/llvm/Analysis/RegionInfoImpl.h index 8d9ec646f519..b694effb2229 100644 --- a/llvm/include/llvm/Analysis/RegionInfoImpl.h +++ b/llvm/include/llvm/Analysis/RegionInfoImpl.h @@ -585,10 +585,8 @@ bool RegionInfoBase<Tr>::isRegion(BlockT *entry, BlockT *exit) const { // Exit is the header of a loop that contains the entry. In this case, // the dominance frontier must only contain the exit. if (!DT->dominates(entry, exit)) { - for (typename DST::iterator SI = entrySuccs->begin(), - SE = entrySuccs->end(); - SI != SE; ++SI) { - if (*SI != exit && *SI != entry) + for (BlockT *successor : *entrySuccs) { + if (successor != exit && successor != entry) return false; } @@ -817,8 +815,7 @@ void RegionInfoBase<Tr>::verifyAnalysis() const { // Region pass manager support. template <class Tr> typename Tr::RegionT *RegionInfoBase<Tr>::getRegionFor(BlockT *BB) const { - typename BBtoRegionMap::const_iterator I = BBtoRegion.find(BB); - return I != BBtoRegion.end() ? I->second : nullptr; + return BBtoRegion.lookup(BB); } template <class Tr> @@ -889,8 +886,7 @@ typename Tr::RegionT *RegionInfoBase<Tr>::getCommonRegion(RegionT *A, template <class Tr> typename Tr::RegionT * RegionInfoBase<Tr>::getCommonRegion(SmallVectorImpl<RegionT *> &Regions) const { - RegionT *ret = Regions.back(); - Regions.pop_back(); + RegionT *ret = Regions.pop_back_val(); for (RegionT *R : Regions) ret = getCommonRegion(ret, R); diff --git a/llvm/include/llvm/Analysis/RegionPass.h b/llvm/include/llvm/Analysis/RegionPass.h index 995c5dca3de3..5c7fa5f56693 100644 --- a/llvm/include/llvm/Analysis/RegionPass.h +++ b/llvm/include/llvm/Analysis/RegionPass.h @@ -85,8 +85,6 @@ protected: /// The pass manager to schedule RegionPasses. class RGPassManager : public FunctionPass, public PMDataManager { std::deque<Region*> RQ; - bool skipThisRegion; - bool redoThisRegion; RegionInfo *RI; Region *CurrentRegion; diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h new file mode 100644 index 000000000000..3018bcc241d8 --- /dev/null +++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h @@ -0,0 +1,41 @@ +//===- ReplayInlineAdvisor.h - Replay Inline Advisor interface -*- C++ --*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_ANALYSIS_REPLAYINLINEADVISOR_H +#define LLVM_ANALYSIS_REPLAYINLINEADVISOR_H + +#include "llvm/ADT/StringSet.h" +#include "llvm/Analysis/InlineAdvisor.h" +#include "llvm/IR/LLVMContext.h" + +namespace llvm { +class BasicBlock; +class CallBase; +class Function; +class Module; +class OptimizationRemarkEmitter; + +/// Replay inline advisor that uses optimization remarks from inlining of +/// previous build to guide current inlining. This is useful for inliner tuning. +class ReplayInlineAdvisor : public InlineAdvisor { +public: + ReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, + LLVMContext &Context, + std::unique_ptr<InlineAdvisor> OriginalAdvisor, + StringRef RemarksFile, bool EmitRemarks); + std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override; + bool areReplayRemarksLoaded() const { return HasReplayRemarks; } + +private: + StringSet<> InlineSitesFromRemarks; + std::unique_ptr<InlineAdvisor> OriginalAdvisor; + bool HasReplayRemarks = false; + bool EmitRemarks = false; +}; +} // namespace llvm +#endif // LLVM_ANALYSIS_REPLAYINLINEADVISOR_H diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 81c5fc932588..b3f199de2cfa 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -70,6 +70,7 @@ class StructType; class TargetLibraryInfo; class Type; class Value; +enum SCEVTypes : unsigned short; /// This class represents an analyzed expression in the program. These are /// opaque objects that the client is not allowed to do much with directly. @@ -82,7 +83,7 @@ class SCEV : public FoldingSetNode { FoldingSetNodeIDRef FastID; // The SCEV baseclass this node corresponds to - const unsigned short SCEVType; + const SCEVTypes SCEVType; protected: // Estimated complexity of this node's expression tree size. @@ -119,13 +120,13 @@ public: NoWrapMask = (1 << 3) - 1 }; - explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy, + explicit SCEV(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy, unsigned short ExpressionSize) : FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {} SCEV(const SCEV &) = delete; SCEV &operator=(const SCEV &) = delete; - unsigned getSCEVType() const { return SCEVType; } + SCEVTypes getSCEVType() const { return SCEVType; } /// Return the LLVM type of this SCEV expression. Type *getType() const; @@ -511,6 +512,7 @@ public: const SCEV *getConstant(ConstantInt *V); const SCEV *getConstant(const APInt &Val); const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false); + const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0); @@ -572,7 +574,9 @@ public: /// \p IndexExprs The expressions for the indices. const SCEV *getGEPExpr(GEPOperator *GEP, const SmallVectorImpl<const SCEV *> &IndexExprs); - const SCEV *getMinMaxExpr(unsigned Kind, + const SCEV *getAbsExpr(const SCEV *Op, bool IsNSW); + const SCEV *getSignumExpr(const SCEV *Op); + const SCEV *getMinMaxExpr(SCEVTypes Kind, SmallVectorImpl<const SCEV *> &Operands); const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands); @@ -591,9 +595,22 @@ public: /// Return a SCEV for the constant 1 of a specific type. const SCEV *getOne(Type *Ty) { return getConstant(Ty, 1); } - /// Return an expression for sizeof AllocTy that is type IntTy + /// Return a SCEV for the constant -1 of a specific type. + const SCEV *getMinusOne(Type *Ty) { + return getConstant(Ty, -1, /*isSigned=*/true); + } + + /// Return an expression for sizeof ScalableTy that is type IntTy, where + /// ScalableTy is a scalable vector type. + const SCEV *getSizeOfScalableVectorExpr(Type *IntTy, + ScalableVectorType *ScalableTy); + + /// Return an expression for the alloc size of AllocTy that is type IntTy const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy); + /// Return an expression for the store size of StoreTy that is type IntTy + const SCEV *getStoreSizeOfExpr(Type *IntTy, Type *StoreTy); + /// Return an expression for offsetof on the given field with type IntTy const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo); @@ -677,6 +694,12 @@ public: bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); + /// Test whether entry to the basic block is protected by a conditional + /// between LHS and RHS. + bool isBasicBlockEntryGuardedByCond(const BasicBlock *BB, + ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS); + /// Test whether the backedge of the loop is protected by a conditional /// between LHS and RHS. This is used to eliminate casts. bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred, @@ -696,7 +719,8 @@ public: /// before taking the branch. For loops with multiple exits, it may not be /// the number times that the loop header executes if the loop exits /// prematurely via another branch. - unsigned getSmallConstantTripCount(const Loop *L, BasicBlock *ExitingBlock); + unsigned getSmallConstantTripCount(const Loop *L, + const BasicBlock *ExitingBlock); /// Returns the upper bound of the loop trip count as a normal unsigned /// value. @@ -718,8 +742,7 @@ public: /// for getSmallConstantTripCount, this assumes that control exits the loop /// via ExitingBlock. unsigned getSmallConstantTripMultiple(const Loop *L, - BasicBlock *ExitingBlock); - + const BasicBlock *ExitingBlock); /// The terms "backedge taken count" and "exit count" are used /// interchangeably to refer to the number of times the backedge of a loop @@ -730,6 +753,8 @@ public: Exact, /// A constant which provides an upper bound on the exact trip count. ConstantMaximum, + /// An expression which provides an upper bound on the exact trip count. + SymbolicMaximum, }; /// Return the number of times the backedge executes before the given exit @@ -737,8 +762,8 @@ public: /// For a single exit loop, this value is equivelent to the result of /// getBackedgeTakenCount. The loop is guaranteed to exit (via *some* exit) /// before the backedge is executed (ExitCount + 1) times. Note that there - /// is no guarantee about *which* exit is taken on the exiting iteration. - const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock, + /// is no guarantee about *which* exit is taken on the exiting iteration. + const SCEV *getExitCount(const Loop *L, const BasicBlock *ExitingBlock, ExitCountKind Kind = Exact); /// If the specified loop has a predictable backedge-taken count, return it, @@ -766,7 +791,15 @@ public: /// SCEVCouldNotCompute object. const SCEV *getConstantMaxBackedgeTakenCount(const Loop *L) { return getBackedgeTakenCount(L, ConstantMaximum); - } + } + + /// When successful, this returns a SCEV that is greater than or equal + /// to (i.e. a "conservative over-approximation") of the value returend by + /// getBackedgeTakenCount. If such a value cannot be computed, it returns the + /// SCEVCouldNotCompute object. + const SCEV *getSymbolicMaxBackedgeTakenCount(const Loop *L) { + return getBackedgeTakenCount(L, SymbolicMaximum); + } /// Return true if the backedge taken count is either the value returned by /// getConstantMaxBackedgeTakenCount or zero. @@ -905,32 +938,61 @@ public: bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); + /// Test if the given expression is known to satisfy the condition described + /// by Pred, LHS, and RHS in the given Context. + bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, const Instruction *Context); + /// Test if the condition described by Pred, LHS, RHS is known to be true on /// every iteration of the loop of the recurrency LHS. bool isKnownOnEveryIteration(ICmpInst::Predicate Pred, const SCEVAddRecExpr *LHS, const SCEV *RHS); - /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X" - /// is monotonically increasing or decreasing. In the former case set - /// `Increasing` to true and in the latter case set `Increasing` to false. - /// /// A predicate is said to be monotonically increasing if may go from being /// false to being true as the loop iterates, but never the other way /// around. A predicate is said to be monotonically decreasing if may go /// from being true to being false as the loop iterates, but never the other /// way around. - bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred, - bool &Increasing); - - /// Return true if the result of the predicate LHS `Pred` RHS is loop - /// invariant with respect to L. Set InvariantPred, InvariantLHS and - /// InvariantLHS so that InvariantLHS `InvariantPred` InvariantRHS is the - /// loop invariant form of LHS `Pred` RHS. - bool isLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, - const SCEV *RHS, const Loop *L, - ICmpInst::Predicate &InvariantPred, - const SCEV *&InvariantLHS, - const SCEV *&InvariantRHS); + enum MonotonicPredicateType { + MonotonicallyIncreasing, + MonotonicallyDecreasing + }; + + /// If, for all loop invariant X, the predicate "LHS `Pred` X" is + /// monotonically increasing or decreasing, returns + /// Some(MonotonicallyIncreasing) and Some(MonotonicallyDecreasing) + /// respectively. If we could not prove either of these facts, returns None. + Optional<MonotonicPredicateType> + getMonotonicPredicateType(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred); + + struct LoopInvariantPredicate { + ICmpInst::Predicate Pred; + const SCEV *LHS; + const SCEV *RHS; + + LoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS) + : Pred(Pred), LHS(LHS), RHS(RHS) {} + }; + /// If the result of the predicate LHS `Pred` RHS is loop invariant with + /// respect to L, return a LoopInvariantPredicate with LHS and RHS being + /// invariants, available at L's entry. Otherwise, return None. + Optional<LoopInvariantPredicate> + getLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, const Loop *L); + + /// If the result of the predicate LHS `Pred` RHS is loop invariant with + /// respect to L at given Context during at least first MaxIter iterations, + /// return a LoopInvariantPredicate with LHS and RHS being invariants, + /// available at L's entry. Otherwise, return None. The predicate should be + /// the loop's exit condition. + Optional<LoopInvariantPredicate> + getLoopInvariantExitCondDuringFirstIterations(ICmpInst::Predicate Pred, + const SCEV *LHS, + const SCEV *RHS, const Loop *L, + const Instruction *Context, + const SCEV *MaxIter); /// Simplify LHS and RHS in a comparison with predicate Pred. Return true /// iff any changes were made. If the operands are provably equal or @@ -1101,6 +1163,20 @@ public: const SCEV *S, const Loop *L, SmallPtrSetImpl<const SCEVPredicate *> &Preds); + /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a + /// constant, and None if it isn't. + /// + /// This is intended to be a cheaper version of getMinusSCEV. We can be + /// frugal here since we just bail out of actually constructing and + /// canonicalizing an expression in the cases where the result isn't going + /// to be a constant. + Optional<APInt> computeConstantDifference(const SCEV *LHS, const SCEV *RHS); + + /// Update no-wrap flags of an AddRec. This may drop the cached info about + /// this AddRec (such as range info) in case if new flags may potentially + /// sharpen it. + void setNoWrapFlags(SCEVAddRecExpr *AddRec, SCEV::NoWrapFlags Flags); + private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. @@ -1181,7 +1257,7 @@ private: ValueExprMapType ValueExprMap; /// Mark predicate values currently being processed by isImpliedCond. - SmallPtrSet<Value *, 6> PendingLoopPredicates; + SmallPtrSet<const Value *, 6> PendingLoopPredicates; /// Mark SCEVUnknown Phis currently being processed by getRangeRef. SmallPtrSet<const PHINode *, 6> PendingPhiRanges; @@ -1284,39 +1360,41 @@ private: /// never have more than one computable exit. SmallVector<ExitNotTakenInfo, 1> ExitNotTaken; - /// The pointer part of \c MaxAndComplete is an expression indicating the - /// least maximum backedge-taken count of the loop that is known, or a - /// SCEVCouldNotCompute. This expression is only valid if the predicates - /// associated with all loop exits are true. - /// - /// The integer part of \c MaxAndComplete is a boolean indicating if \c - /// ExitNotTaken has an element for every exiting block in the loop. - PointerIntPair<const SCEV *, 1> MaxAndComplete; + /// Expression indicating the least constant maximum backedge-taken count of + /// the loop that is known, or a SCEVCouldNotCompute. This expression is + /// only valid if the redicates associated with all loop exits are true. + const SCEV *ConstantMax; + + /// Indicating if \c ExitNotTaken has an element for every exiting block in + /// the loop. + bool IsComplete; + + /// Expression indicating the least maximum backedge-taken count of the loop + /// that is known, or a SCEVCouldNotCompute. Lazily computed on first query. + const SCEV *SymbolicMax = nullptr; /// True iff the backedge is taken either exactly Max or zero times. bool MaxOrZero = false; - /// \name Helper projection functions on \c MaxAndComplete. - /// @{ - bool isComplete() const { return MaxAndComplete.getInt(); } - const SCEV *getMax() const { return MaxAndComplete.getPointer(); } - /// @} + bool isComplete() const { return IsComplete; } + const SCEV *getConstantMax() const { return ConstantMax; } public: - BackedgeTakenInfo() : MaxAndComplete(nullptr, 0) {} + BackedgeTakenInfo() : ConstantMax(nullptr), IsComplete(false) {} BackedgeTakenInfo(BackedgeTakenInfo &&) = default; BackedgeTakenInfo &operator=(BackedgeTakenInfo &&) = default; using EdgeExitInfo = std::pair<BasicBlock *, ExitLimit>; /// Initialize BackedgeTakenInfo from a list of exact exit counts. - BackedgeTakenInfo(ArrayRef<EdgeExitInfo> ExitCounts, bool Complete, - const SCEV *MaxCount, bool MaxOrZero); + BackedgeTakenInfo(ArrayRef<EdgeExitInfo> ExitCounts, bool IsComplete, + const SCEV *ConstantMax, bool MaxOrZero); /// Test whether this BackedgeTakenInfo contains any computed information, /// or whether it's all SCEVCouldNotCompute values. bool hasAnyInfo() const { - return !ExitNotTaken.empty() || !isa<SCEVCouldNotCompute>(getMax()); + return !ExitNotTaken.empty() || + !isa<SCEVCouldNotCompute>(getConstantMax()); } /// Test whether this BackedgeTakenInfo contains complete information. @@ -1347,17 +1425,22 @@ private: /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via /// this block before this number of iterations, but may exit via another /// block. - const SCEV *getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const; + const SCEV *getExact(const BasicBlock *ExitingBlock, + ScalarEvolution *SE) const; + + /// Get the constant max backedge taken count for the loop. + const SCEV *getConstantMax(ScalarEvolution *SE) const; - /// Get the max backedge taken count for the loop. - const SCEV *getMax(ScalarEvolution *SE) const; + /// Get the constant max backedge taken count for the particular loop exit. + const SCEV *getConstantMax(const BasicBlock *ExitingBlock, + ScalarEvolution *SE) const; - /// Get the max backedge taken count for the particular loop exit. - const SCEV *getMax(BasicBlock *ExitingBlock, ScalarEvolution *SE) const; + /// Get the symbolic max backedge taken count for the loop. + const SCEV *getSymbolicMax(const Loop *L, ScalarEvolution *SE); /// Return true if the number of times this backedge is taken is either the - /// value returned by getMax or zero. - bool isMaxOrZero(ScalarEvolution *SE) const; + /// value returned by getConstantMax or zero. + bool isConstantMaxOrZero(ScalarEvolution *SE) const; /// Return true if any backedge taken count expressions refer to the given /// subexpression. @@ -1462,6 +1545,13 @@ private: ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop, const SCEV *MaxBECount, unsigned BitWidth); + /// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p + /// Start,+,\p Stop}<nw>. + ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec, + const SCEV *MaxBECount, + unsigned BitWidth, + RangeSignHint SignHint); + /// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p /// Stop} by "factoring out" a ternary expression from the add recurrence. /// Helper called by \c getRange. @@ -1507,7 +1597,7 @@ private: /// Return the BackedgeTakenInfo for the given loop, lazily computing new /// values if the loop hasn't been analyzed yet. The returned result is /// guaranteed not to be predicated. - const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L); + BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L); /// Similar to getBackedgeTakenInfo, but will add predicates as required /// with the purpose of returning complete information. @@ -1540,6 +1630,11 @@ private: bool ExitIfTrue, bool ControlsExit, bool AllowPredicates = false); + /// Return a symbolic upper bound for the backedge taken count of the loop. + /// This is more general than getConstantMaxBackedgeTakenCount as it returns + /// an arbitrary expression as opposed to only constants. + const SCEV *computeSymbolicMaxBackedgeTakenCount(const Loop *L); + // Helper functions for computeExitLimitFromCond to avoid exponential time // complexity. @@ -1577,6 +1672,10 @@ private: Value *ExitCond, bool ExitIfTrue, bool ControlsExit, bool AllowPredicates); + Optional<ScalarEvolution::ExitLimit> + computeExitLimitFromCondFromBinOp(ExitLimitCacheTy &Cache, const Loop *L, + Value *ExitCond, bool ExitIfTrue, + bool ControlsExit, bool AllowPredicates); /// Compute the number of times the backedge of the specified loop will /// execute if its exit condition were a conditional branch of the ICmpInst @@ -1655,27 +1754,44 @@ private: /// Return a predecessor of BB (which may not be an immediate predecessor) /// which has exactly one successor from which BB is reachable, or null if /// no such block is found. - std::pair<BasicBlock *, BasicBlock *> - getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB); + std::pair<const BasicBlock *, const BasicBlock *> + getPredecessorWithUniqueSuccessorForBB(const BasicBlock *BB) const; /// Test whether the condition described by Pred, LHS, and RHS is true - /// whenever the given FoundCondValue value evaluates to true. + /// whenever the given FoundCondValue value evaluates to true in given + /// Context. If Context is nullptr, then the found predicate is true + /// everywhere. LHS and FoundLHS may have different type width. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, - Value *FoundCondValue, bool Inverse); + const Value *FoundCondValue, bool Inverse, + const Instruction *Context = nullptr); + + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the given FoundCondValue value evaluates to true in given + /// Context. If Context is nullptr, then the found predicate is true + /// everywhere. LHS and FoundLHS must have same type width. + bool isImpliedCondBalancedTypes(ICmpInst::Predicate Pred, const SCEV *LHS, + const SCEV *RHS, + ICmpInst::Predicate FoundPred, + const SCEV *FoundLHS, const SCEV *FoundRHS, + const Instruction *Context); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is - /// true. + /// true in given Context. If Context is nullptr, then the found predicate is + /// true everywhere. bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, - const SCEV *FoundRHS); + const SCEV *FoundRHS, + const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is - /// true. + /// true in given Context. If Context is nullptr, then the found predicate is + /// true everywhere. bool isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, - const SCEV *FoundRHS); + const SCEV *FoundRHS, + const Instruction *Context = nullptr); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is @@ -1708,7 +1824,7 @@ private: /// Return true if the condition denoted by \p LHS \p Pred \p RHS is implied /// by a call to @llvm.experimental.guard in \p BB. - bool isImpliedViaGuard(BasicBlock *BB, ICmpInst::Predicate Pred, + bool isImpliedViaGuard(const BasicBlock *BB, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS); /// Test whether the condition described by Pred, LHS, and RHS is true @@ -1726,6 +1842,18 @@ private: /// whenever the condition described by Pred, FoundLHS, and FoundRHS is /// true. /// + /// This routine tries to weaken the known condition basing on fact that + /// FoundLHS is an AddRec. + bool isImpliedCondOperandsViaAddRecStart(ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS, + const SCEV *FoundLHS, + const SCEV *FoundRHS, + const Instruction *Context); + + /// Test whether the condition described by Pred, LHS, and RHS is true + /// whenever the condition described by Pred, FoundLHS, and FoundRHS is + /// true. + /// /// This routine tries to figure out predicate for Phis which are SCEVUnknown /// if it is true for every possible incoming value from their respective /// basic blocks. @@ -1762,15 +1890,6 @@ private: bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R, SCEV::NoWrapFlags &Flags); - /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a - /// constant, and None if it isn't. - /// - /// This is intended to be a cheaper version of getMinusSCEV. We can be - /// frugal here since we just bail out of actually constructing and - /// canonicalizing an expression in the cases where the result isn't going - /// to be a constant. - Optional<APInt> computeConstantDifference(const SCEV *LHS, const SCEV *RHS); - /// Drop memoized information computed for S. void forgetMemoizedResults(const SCEV *S); @@ -1793,8 +1912,17 @@ private: /// Try to prove NSW or NUW on \p AR relying on ConstantRange manipulation. SCEV::NoWrapFlags proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR); - bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, - ICmpInst::Predicate Pred, bool &Increasing); + /// Try to prove NSW on \p AR by proving facts about conditions known on + /// entry and backedge. + SCEV::NoWrapFlags proveNoSignedWrapViaInduction(const SCEVAddRecExpr *AR); + + /// Try to prove NUW on \p AR by proving facts about conditions known on + /// entry and backedge. + SCEV::NoWrapFlags proveNoUnsignedWrapViaInduction(const SCEVAddRecExpr *AR); + + Optional<MonotonicPredicateType> + getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS, + ICmpInst::Predicate Pred); /// Return SCEV no-wrap flags that can be proven based on reasoning about /// how poison produced from no-wrap flags on this value (e.g. a nuw add) @@ -1893,6 +2021,9 @@ private: /// Assign A and B to LHS and RHS, respectively. bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS); + /// Try to apply information from loop guards for \p L to \p Expr. + const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L); + /// Look for a SCEV expression with type `SCEVType` and operands `Ops` in /// `UniqueSCEVs`. /// @@ -1901,7 +2032,7 @@ private: /// constructed to look up the SCEV and the third component is the insertion /// point. std::tuple<SCEV *, FoldingSetNodeID, void *> - findExistingSCEVInCache(int SCEVType, ArrayRef<const SCEV *> Ops); + findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops); FoldingSet<SCEV> UniqueSCEVs; FoldingSet<SCEVPredicate> UniquePreds; diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionDivision.h b/llvm/include/llvm/Analysis/ScalarEvolutionDivision.h index 480f92c117a0..24f0c51487bd 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionDivision.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionDivision.h @@ -33,6 +33,7 @@ public: // Except in the trivial case described above, we do not know how to divide // Expr by Denominator for the following functions with empty implementation. + void visitPtrToIntExpr(const SCEVPtrToIntExpr *Numerator) {} void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {} void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {} void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {} diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h index 0076e02ae1bf..37e675f08afc 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -35,12 +35,12 @@ class ConstantRange; class Loop; class Type; - enum SCEVTypes { + enum SCEVTypes : unsigned short { // These should be ordered in terms of increasing complexity to make the // folders simpler. scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr, scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scUMinExpr, scSMinExpr, - scUnknown, scCouldNotCompute + scPtrToInt, scUnknown, scCouldNotCompute }; /// This class represents a constant integer value. @@ -74,18 +74,58 @@ class Type; /// This is the base class for unary cast operator classes. class SCEVCastExpr : public SCEV { protected: - const SCEV *Op; + std::array<const SCEV *, 1> Operands; Type *Ty; - SCEVCastExpr(const FoldingSetNodeIDRef ID, - unsigned SCEVTy, const SCEV *op, Type *ty); + SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy, const SCEV *op, + Type *ty); public: - const SCEV *getOperand() const { return Op; } + const SCEV *getOperand() const { return Operands[0]; } + const SCEV *getOperand(unsigned i) const { + assert(i == 0 && "Operand index out of range!"); + return Operands[0]; + } + using op_iterator = std::array<const SCEV *, 1>::const_iterator; + using op_range = iterator_range<op_iterator>; + + op_range operands() const { + return make_range(Operands.begin(), Operands.end()); + } + size_t getNumOperands() const { return 1; } Type *getType() const { return Ty; } /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const SCEV *S) { + return S->getSCEVType() == scPtrToInt || S->getSCEVType() == scTruncate || + S->getSCEVType() == scZeroExtend || + S->getSCEVType() == scSignExtend; + } + }; + + /// This class represents a cast from a pointer to a pointer-sized integer + /// value. + class SCEVPtrToIntExpr : public SCEVCastExpr { + friend class ScalarEvolution; + + SCEVPtrToIntExpr(const FoldingSetNodeIDRef ID, const SCEV *Op, Type *ITy); + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { + return S->getSCEVType() == scPtrToInt; + } + }; + + /// This is the base class for unary integral cast operator classes. + class SCEVIntegralCastExpr : public SCEVCastExpr { + protected: + SCEVIntegralCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy, + const SCEV *op, Type *ty); + + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { return S->getSCEVType() == scTruncate || S->getSCEVType() == scZeroExtend || S->getSCEVType() == scSignExtend; @@ -94,7 +134,7 @@ class Type; /// This class represents a truncation of an integer value to a /// smaller integer value. - class SCEVTruncateExpr : public SCEVCastExpr { + class SCEVTruncateExpr : public SCEVIntegralCastExpr { friend class ScalarEvolution; SCEVTruncateExpr(const FoldingSetNodeIDRef ID, @@ -109,7 +149,7 @@ class Type; /// This class represents a zero extension of a small integer value /// to a larger integer value. - class SCEVZeroExtendExpr : public SCEVCastExpr { + class SCEVZeroExtendExpr : public SCEVIntegralCastExpr { friend class ScalarEvolution; SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, @@ -124,7 +164,7 @@ class Type; /// This class represents a sign extension of a small integer value /// to a larger integer value. - class SCEVSignExtendExpr : public SCEVCastExpr { + class SCEVSignExtendExpr : public SCEVIntegralCastExpr { friend class ScalarEvolution; SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, @@ -263,16 +303,28 @@ class Type; class SCEVUDivExpr : public SCEV { friend class ScalarEvolution; - const SCEV *LHS; - const SCEV *RHS; + std::array<const SCEV *, 2> Operands; SCEVUDivExpr(const FoldingSetNodeIDRef ID, const SCEV *lhs, const SCEV *rhs) - : SCEV(ID, scUDivExpr, computeExpressionSize({lhs, rhs})), LHS(lhs), - RHS(rhs) {} + : SCEV(ID, scUDivExpr, computeExpressionSize({lhs, rhs})) { + Operands[0] = lhs; + Operands[1] = rhs; + } public: - const SCEV *getLHS() const { return LHS; } - const SCEV *getRHS() const { return RHS; } + const SCEV *getLHS() const { return Operands[0]; } + const SCEV *getRHS() const { return Operands[1]; } + size_t getNumOperands() const { return 2; } + const SCEV *getOperand(unsigned i) const { + assert((i == 0 || i == 1) && "Operand index out of range!"); + return i == 0 ? getLHS() : getRHS(); + } + + using op_iterator = std::array<const SCEV *, 2>::const_iterator; + using op_range = iterator_range<op_iterator>; + op_range operands() const { + return make_range(Operands.begin(), Operands.end()); + } Type *getType() const { // In most cases the types of LHS and RHS will be the same, but in some @@ -389,7 +441,7 @@ class Type; public: static bool classof(const SCEV *S) { - return isMinMaxType(static_cast<SCEVTypes>(S->getSCEVType())); + return isMinMaxType(S->getSCEVType()); } static enum SCEVTypes negate(enum SCEVTypes T) { @@ -518,6 +570,8 @@ class Type; switch (S->getSCEVType()) { case scConstant: return ((SC*)this)->visitConstant((const SCEVConstant*)S); + case scPtrToInt: + return ((SC *)this)->visitPtrToIntExpr((const SCEVPtrToIntExpr *)S); case scTruncate: return ((SC*)this)->visitTruncateExpr((const SCEVTruncateExpr*)S); case scZeroExtend: @@ -544,9 +598,8 @@ class Type; return ((SC*)this)->visitUnknown((const SCEVUnknown*)S); case scCouldNotCompute: return ((SC*)this)->visitCouldNotCompute((const SCEVCouldNotCompute*)S); - default: - llvm_unreachable("Unknown SCEV type!"); } + llvm_unreachable("Unknown SCEV kind!"); } RetVal visitCouldNotCompute(const SCEVCouldNotCompute *S) { @@ -583,12 +636,13 @@ class Type; switch (S->getSCEVType()) { case scConstant: case scUnknown: - break; + continue; + case scPtrToInt: case scTruncate: case scZeroExtend: case scSignExtend: push(cast<SCEVCastExpr>(S)->getOperand()); - break; + continue; case scAddExpr: case scMulExpr: case scSMaxExpr: @@ -598,18 +652,17 @@ class Type; case scAddRecExpr: for (const auto *Op : cast<SCEVNAryExpr>(S)->operands()) push(Op); - break; + continue; case scUDivExpr: { const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); push(UDiv->getLHS()); push(UDiv->getRHS()); - break; + continue; } case scCouldNotCompute: llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - default: - llvm_unreachable("Unknown SCEV kind!"); } + llvm_unreachable("Unknown SCEV kind!"); } } }; @@ -677,6 +730,13 @@ class Type; return Constant; } + const SCEV *visitPtrToIntExpr(const SCEVPtrToIntExpr *Expr) { + const SCEV *Operand = ((SC *)this)->visit(Expr->getOperand()); + return Operand == Expr->getOperand() + ? Expr + : SE.getPtrToIntExpr(Operand, Expr->getType()); + } + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand()); return Operand == Expr->getOperand() @@ -787,35 +847,30 @@ class Type; }; using ValueToValueMap = DenseMap<const Value *, Value *>; + using ValueToSCEVMapTy = DenseMap<const Value *, const SCEV *>; /// The SCEVParameterRewriter takes a scalar evolution expression and updates - /// the SCEVUnknown components following the Map (Value -> Value). + /// the SCEVUnknown components following the Map (Value -> SCEV). class SCEVParameterRewriter : public SCEVRewriteVisitor<SCEVParameterRewriter> { public: static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, - ValueToValueMap &Map, - bool InterpretConsts = false) { - SCEVParameterRewriter Rewriter(SE, Map, InterpretConsts); + ValueToSCEVMapTy &Map) { + SCEVParameterRewriter Rewriter(SE, Map); return Rewriter.visit(Scev); } - SCEVParameterRewriter(ScalarEvolution &SE, ValueToValueMap &M, bool C) - : SCEVRewriteVisitor(SE), Map(M), InterpretConsts(C) {} + SCEVParameterRewriter(ScalarEvolution &SE, ValueToSCEVMapTy &M) + : SCEVRewriteVisitor(SE), Map(M) {} const SCEV *visitUnknown(const SCEVUnknown *Expr) { - Value *V = Expr->getValue(); - if (Map.count(V)) { - Value *NV = Map[V]; - if (InterpretConsts && isa<ConstantInt>(NV)) - return SE.getConstant(cast<ConstantInt>(NV)); - return SE.getUnknown(NV); - } - return Expr; + auto I = Map.find(Expr->getValue()); + if (I == Map.end()) + return Expr; + return I->second; } private: - ValueToValueMap ⤅ - bool InterpretConsts; + ValueToSCEVMapTy ⤅ }; using LoopToScevMapT = DenseMap<const Loop *, const SCEV *>; diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h index fac92e4a25a4..81a2533152de 100644 --- a/llvm/include/llvm/Analysis/SparsePropagation.h +++ b/llvm/include/llvm/Analysis/SparsePropagation.h @@ -485,8 +485,7 @@ void SparseSolver<LatticeKey, LatticeVal, KeyInfo>::Solve() { // Process the basic block work list. while (!BBWorkList.empty()) { - BasicBlock *BB = BBWorkList.back(); - BBWorkList.pop_back(); + BasicBlock *BB = BBWorkList.pop_back_val(); LLVM_DEBUG(dbgs() << "\nPopped off BBWL: " << *BB); diff --git a/llvm/include/llvm/Analysis/StackLifetime.h b/llvm/include/llvm/Analysis/StackLifetime.h index 8abc6cc1ce00..df342a9533ee 100644 --- a/llvm/include/llvm/Analysis/StackLifetime.h +++ b/llvm/include/llvm/Analysis/StackLifetime.h @@ -13,6 +13,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/raw_ostream.h" @@ -121,6 +122,8 @@ private: DenseMap<const BasicBlock *, SmallVector<std::pair<unsigned, Marker>, 4>> BBMarkers; + bool HasUnknownLifetimeStartOrEnd = false; + void dumpAllocas() const; void dumpBlockLiveness() const; void dumpLiveRanges() const; @@ -166,16 +169,9 @@ public: static inline raw_ostream &operator<<(raw_ostream &OS, const BitVector &V) { OS << "{"; - int Idx = V.find_first(); - bool First = true; - while (Idx >= 0) { - if (!First) { - OS << ", "; - } - First = false; - OS << Idx; - Idx = V.find_next(Idx); - } + ListSeparator LS; + for (int Idx = V.find_first(); Idx >= 0; Idx = V.find_next(Idx)) + OS << LS << Idx; OS << "}"; return OS; } diff --git a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h index 846c2e6f7e91..59c1e3e3bd56 100644 --- a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h +++ b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h @@ -51,7 +51,8 @@ public: /// StackSafety assumes that missing parameter information means possibility /// of access to the parameter with any offset, so we can correctly link /// code without StackSafety information, e.g. non-ThinLTO. - std::vector<FunctionSummary::ParamAccess> getParamAccesses() const; + std::vector<FunctionSummary::ParamAccess> + getParamAccesses(ModuleSummaryIndex &Index) const; }; class StackSafetyGlobalInfo { diff --git a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h index 2f07b3135308..9838d629e93e 100644 --- a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/LoopInfo.h" #include <memory> +#include <unordered_map> namespace llvm { @@ -30,6 +31,26 @@ class Loop; class PostDominatorTree; using ConstBlockSet = SmallPtrSet<const BasicBlock *, 4>; +struct ControlDivergenceDesc { + // Join points of divergent disjoint paths. + ConstBlockSet JoinDivBlocks; + // Divergent loop exits + ConstBlockSet LoopDivBlocks; +}; + +struct ModifiedPO { + std::vector<const BasicBlock *> LoopPO; + std::unordered_map<const BasicBlock *, unsigned> POIndex; + void appendBlock(const BasicBlock &BB) { + POIndex[&BB] = LoopPO.size(); + LoopPO.push_back(&BB); + } + unsigned getIndexOf(const BasicBlock &BB) const { + return POIndex.find(&BB)->second; + } + unsigned size() const { return LoopPO.size(); } + const BasicBlock *getBlockAt(unsigned Idx) const { return LoopPO[Idx]; } +}; /// \brief Relates points of divergent control to join points in /// reducible CFGs. @@ -51,28 +72,19 @@ public: /// header. Those exit blocks are added to the returned set. /// If L is the parent loop of \p Term and an exit of L is in the returned /// set then L is a divergent loop. - const ConstBlockSet &join_blocks(const Instruction &Term); - - /// \brief Computes divergent join points and loop exits (in the surrounding - /// loop) caused by the divergent loop exits of\p Loop. - /// - /// The set of blocks which are reachable by disjoint paths from the - /// loop exits of \p Loop. - /// This treats the loop as a single node in \p Loop's parent loop. - /// The returned set has the same properties as for join_blocks(TermInst&). - const ConstBlockSet &join_blocks(const Loop &Loop); + const ControlDivergenceDesc &getJoinBlocks(const Instruction &Term); private: - static ConstBlockSet EmptyBlockSet; + static ControlDivergenceDesc EmptyDivergenceDesc; + + ModifiedPO LoopPO; - ReversePostOrderTraversal<const Function *> FuncRPOT; const DominatorTree &DT; const PostDominatorTree &PDT; const LoopInfo &LI; - std::map<const Loop *, std::unique_ptr<ConstBlockSet>> CachedLoopExitJoins; - std::map<const Instruction *, std::unique_ptr<ConstBlockSet>> - CachedBranchJoins; + std::map<const Instruction *, std::unique_ptr<ControlDivergenceDesc>> + CachedControlDivDescs; }; } // namespace llvm diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index 3864d4955104..defc95d0062a 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -262,6 +262,12 @@ TLI_DEFINE_STRING_INTERNAL("__atanhf_finite") /// long double __atanhl_finite(long double x); TLI_DEFINE_ENUM_INTERNAL(atanhl_finite) TLI_DEFINE_STRING_INTERNAL("__atanhl_finite") +/// void __atomic_load(size_t size, void *mptr, void *vptr, int smodel); +TLI_DEFINE_ENUM_INTERNAL(atomic_load) +TLI_DEFINE_STRING_INTERNAL("__atomic_load") +/// void __atomic_store(size_t size, void *mptr, void *vptr, int smodel); +TLI_DEFINE_ENUM_INTERNAL(atomic_store) +TLI_DEFINE_STRING_INTERNAL("__atomic_store") /// double __cosh_finite(double x); TLI_DEFINE_ENUM_INTERNAL(cosh_finite) TLI_DEFINE_STRING_INTERNAL("__cosh_finite") @@ -360,6 +366,9 @@ TLI_DEFINE_STRING_INTERNAL("__memcpy_chk") /// void *__memmove_chk(void *s1, const void *s2, size_t n, size_t s1size); TLI_DEFINE_ENUM_INTERNAL(memmove_chk) TLI_DEFINE_STRING_INTERNAL("__memmove_chk") +/// void *__mempcpy_chk(void *s1, const void *s2, size_t n, size_t s1size); +TLI_DEFINE_ENUM_INTERNAL(mempcpy_chk) +TLI_DEFINE_STRING_INTERNAL("__mempcpy_chk") /// void *__memset_chk(void *s, char v, size_t n, size_t s1size); TLI_DEFINE_ENUM_INTERNAL(memset_chk) TLI_DEFINE_STRING_INTERNAL("__memset_chk") @@ -1411,6 +1420,18 @@ TLI_DEFINE_STRING_INTERNAL("utimes") /// void *valloc(size_t size); TLI_DEFINE_ENUM_INTERNAL(valloc) TLI_DEFINE_STRING_INTERNAL("valloc") +/// void *vec_calloc(size_t count, size_t size); +TLI_DEFINE_ENUM_INTERNAL(vec_calloc) +TLI_DEFINE_STRING_INTERNAL("vec_calloc") +/// void vec_free(void *ptr); +TLI_DEFINE_ENUM_INTERNAL(vec_free) +TLI_DEFINE_STRING_INTERNAL("vec_free") +/// void *vec_malloc(size_t size); +TLI_DEFINE_ENUM_INTERNAL(vec_malloc) +TLI_DEFINE_STRING_INTERNAL("vec_malloc") +/// void *vec_realloc(void *ptr, size_t size); +TLI_DEFINE_ENUM_INTERNAL(vec_realloc) +TLI_DEFINE_STRING_INTERNAL("vec_realloc") /// int vfprintf(FILE *stream, const char *format, va_list ap); TLI_DEFINE_ENUM_INTERNAL(vfprintf) TLI_DEFINE_STRING_INTERNAL("vfprintf") diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index 3a7c26e1463b..34a8a1e3407c 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -88,6 +88,7 @@ public: enum VectorLibrary { NoLibrary, // Don't use any vector library. Accelerate, // Use Accelerate framework. + LIBMVEC_X86,// GLIBC Vector Math library. MASSV, // IBM MASS vector library. SVML // Intel short vector math library. }; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index b6698eefdb01..cdfb04424e56 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -21,11 +21,13 @@ #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/InstructionCost.h" #include <functional> namespace llvm { @@ -42,6 +44,7 @@ class CallBase; class ExtractElementInst; class Function; class GlobalValue; +class InstCombiner; class IntrinsicInst; class LoadInst; class LoopAccessInfo; @@ -56,6 +59,7 @@ class TargetLibraryInfo; class Type; class User; class Value; +struct KnownBits; template <typename T> class Optional; /// Information about a load/store intrinsic defined by the target. @@ -90,7 +94,7 @@ struct HardwareLoopInfo { Loop *L = nullptr; BasicBlock *ExitBlock = nullptr; BranchInst *ExitBranch = nullptr; - const SCEV *ExitCount = nullptr; + const SCEV *TripCount = nullptr; IntegerType *CountType = nullptr; Value *LoopDecrement = nullptr; // Decrement the loop counter by this // value in every iteration. @@ -114,7 +118,7 @@ class IntrinsicCostAttributes { SmallVector<Type *, 4> ParamTys; SmallVector<const Value *, 4> Arguments; FastMathFlags FMF; - unsigned VF = 1; + ElementCount VF = ElementCount::getFixed(1); // If ScalarizationCost is UINT_MAX, the cost of scalarizing the // arguments and the return value will be computed based on types. unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); @@ -125,10 +129,10 @@ public: IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI); IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, - unsigned Factor); + ElementCount Factor); IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, - unsigned Factor, unsigned ScalarCost); + ElementCount Factor, unsigned ScalarCost); IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys, FastMathFlags Flags); @@ -151,7 +155,7 @@ public: Intrinsic::ID getID() const { return IID; } const IntrinsicInst *getInst() const { return II; } Type *getReturnType() const { return RetTy; } - unsigned getVectorFactor() const { return VF; } + ElementCount getVectorFactor() const { return VF; } FastMathFlags getFlags() const { return FMF; } unsigned getScalarizationCost() const { return ScalarizationCost; } const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; } @@ -228,19 +232,24 @@ public: /// /// Note, this method does not cache the cost calculation and it /// can be expensive in some cases. - int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const { + InstructionCost getInstructionCost(const Instruction *I, + enum TargetCostKind kind) const { + InstructionCost Cost; switch (kind) { case TCK_RecipThroughput: - return getInstructionThroughput(I); - + Cost = getInstructionThroughput(I); + break; case TCK_Latency: - return getInstructionLatency(I); - + Cost = getInstructionLatency(I); + break; case TCK_CodeSize: case TCK_SizeAndLatency: - return getUserCost(I, kind); + Cost = getUserCost(I, kind); + break; } - llvm_unreachable("Unknown instruction cost kind"); + if (Cost == -1) + Cost.setInvalid(); + return Cost; } /// Underlying constants for 'cost' values in this interface. @@ -280,6 +289,9 @@ public: /// individual classes of instructions would be better. unsigned getInliningThresholdMultiplier() const; + /// \returns A value to be added to the inlining threshold. + unsigned adjustInliningThreshold(const CallBase *CB) const; + /// \returns Vector bonus in percent. /// /// Vector bonuses: We want to more aggressively inline vector-dense kernels @@ -323,8 +335,7 @@ public: /// This is a helper function which calls the two-argument getUserCost /// with \p Operands which are the current operands U has. int getUserCost(const User *U, TargetCostKind CostKind) const { - SmallVector<const Value *, 4> Operands(U->value_op_begin(), - U->value_op_end()); + SmallVector<const Value *, 4> Operands(U->operand_values()); return getUserCost(U, Operands, CostKind); } @@ -379,6 +390,8 @@ public: bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const; + unsigned getAssumedAddrSpace(const Value *V) const; + /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p /// NewV, which has a different address space. This should happen for every /// operand index that collectFlatAddressOperands returned for the intrinsic. @@ -542,6 +555,29 @@ public: /// target-independent defaults with information from \p L and \p SE. void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const; + + /// Targets can implement their own combinations for target-specific + /// intrinsics. This function will be called from the InstCombine pass every + /// time a target-specific intrinsic is encountered. + /// + /// \returns None to not do anything target specific or a value that will be + /// returned from the InstCombiner. It is possible to return null and stop + /// further processing of the intrinsic by returning nullptr. + Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const; + /// Can be used to implement target-specific instruction combining. + /// \see instCombineIntrinsic + Optional<Value *> + simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, + APInt DemandedMask, KnownBits &Known, + bool &KnownBitsComputed) const; + /// Can be used to implement target-specific instruction combining. + /// \see instCombineIntrinsic + Optional<Value *> simplifyDemandedVectorEltsIntrinsic( + InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, + APInt &UndefElts2, APInt &UndefElts3, + std::function<void(Instruction *, unsigned, APInt, APInt &)> + SimplifyAndSetOp) const; /// @} /// \name Scalar Target Information @@ -583,6 +619,11 @@ public: bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const; + /// Return true if LSR major cost is number of registers. Targets which + /// implement their own isLSRCostLess and unset number of registers as major + /// cost should return false, otherwise return true. + bool isNumRegsMajorCostOfLSR() const; + /// \returns true if LSR should not optimize a chain that includes \p I. bool isProfitableLSRChainElement(Instruction *I) const; @@ -672,6 +713,9 @@ public: /// Return true if this type is legal. bool isTypeLegal(Type *Ty) const; + /// Returns the estimated number of registers required to represent \p Ty. + unsigned getRegUsageForType(Type *Ty) const; + /// Return true if switches should be turned into lookup tables for the /// target. bool shouldBuildLookupTables() const; @@ -780,8 +824,9 @@ public: /// Return the expected cost of materialization for the given integer /// immediate of the specified type for a given instruction. The cost can be /// zero if the immediate can be folded into the specified instruction. - int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty, TargetCostKind CostKind) const; + int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, + TargetCostKind CostKind, + Instruction *Inst = nullptr) const; int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const; @@ -845,6 +890,10 @@ public: static ReductionKind matchVectorSplittingReduction( const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty); + static ReductionKind matchVectorReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, VectorType *&Ty, + bool &IsPairwise); + /// Additional information about an operand's possible values. enum OperandValueKind { OK_AnyValue, // Operand can have any value. @@ -881,6 +930,10 @@ public: /// \return The width of the smallest vector register type. unsigned getMinVectorRegisterBitWidth() const; + /// \return The maximum value of vscale if the target specifies an + /// architectural maximum vector length, and None otherwise. + Optional<unsigned> getMaxVScale() const; + /// \return True if the vectorization factor should be chosen to /// make the vector of the smallest element type match the size of a /// vector register. For wider element types, this could result in @@ -894,6 +947,11 @@ public: /// applies when shouldMaximizeVectorBandwidth returns true. unsigned getMinimumVF(unsigned ElemWidth) const; + /// \return The maximum vectorization factor for types of given element + /// bit width and opcode, or 0 if there is no maximum VF. + /// Currently only used by the SLP vectorizer. + unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const; + /// \return True if it should be considered for address type promotion. /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is /// profitable without finding other extensions fed by the same input. @@ -996,10 +1054,47 @@ public: int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0, VectorType *SubTp = nullptr) const; + /// Represents a hint about the context in which a cast is used. + /// + /// For zext/sext, the context of the cast is the operand, which must be a + /// load of some kind. For trunc, the context is of the cast is the single + /// user of the instruction, which must be a store of some kind. + /// + /// This enum allows the vectorizer to give getCastInstrCost an idea of the + /// type of cast it's dealing with, as not every cast is equal. For instance, + /// the zext of a load may be free, but the zext of an interleaving load can + //// be (very) expensive! + /// + /// See \c getCastContextHint to compute a CastContextHint from a cast + /// Instruction*. Callers can use it if they don't need to override the + /// context and just want it to be calculated from the instruction. + /// + /// FIXME: This handles the types of load/store that the vectorizer can + /// produce, which are the cases where the context instruction is most + /// likely to be incorrect. There are other situations where that can happen + /// too, which might be handled here but in the long run a more general + /// solution of costing multiple instructions at the same times may be better. + enum class CastContextHint : uint8_t { + None, ///< The cast is not used with a load/store of any kind. + Normal, ///< The cast is used with a normal load/store. + Masked, ///< The cast is used with a masked load/store. + GatherScatter, ///< The cast is used with a gather/scatter. + Interleave, ///< The cast is used with an interleaved load/store. + Reversed, ///< The cast is used with a reversed load/store. + }; + + /// Calculates a CastContextHint from \p I. + /// This should be used by callers of getCastInstrCost if they wish to + /// determine the context from some instruction. + /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr, + /// or if it's another type of cast. + static CastContextHint getCastContextHint(const Instruction *I); + /// \return The expected cost of cast instructions, such as bitcast, trunc, /// zext, etc. If there is an existing instruction that holds Opcode, it /// may be passed in the 'I' parameter. int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::CastContextHint CCH, TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, const Instruction *I = nullptr) const; @@ -1015,10 +1110,14 @@ public: /// \returns The expected cost of compare and select instructions. If there /// is an existing instruction that holds Opcode, it may be passed in the - /// 'I' parameter. - int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr) const; + /// 'I' parameter. The \p VecPred parameter can be used to indicate the select + /// is using a compare with the specified predicate as condition. When vector + /// types are passed, \p VecPred must be used for all lanes. + int getCmpSelInstrCost( + unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, + CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; /// \return The expected cost of vector Insert and Extract. /// Use -1 to indicate that there is no information on the index value. @@ -1086,6 +1185,16 @@ public: VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; + /// Calculate the cost of an extended reduction pattern, similar to + /// getArithmeticReductionCost of an Add reduction with an extension and + /// optional multiply. This is the cost of as: + /// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then: + /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens + /// on a VectorType with ResTy elements and Ty lanes. + InstructionCost getExtendedAddReductionCost( + bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; + /// \returns The cost of Intrinsic instructions. Analyses the real arguments. /// Three cases are handled: 1. scalar instruction 2. vector instruction /// 3. scalar instruction which is to be vectorized. @@ -1221,6 +1330,24 @@ public: bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const; + /// \returns True if the target prefers reductions in loop. + bool preferInLoopReduction(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const; + + /// \returns True if the target prefers reductions select kept in the loop + /// when tail folding. i.e. + /// loop: + /// p = phi (0, s) + /// a = add (p, x) + /// s = select (mask, a, p) + /// vecreduce.add(s) + /// + /// As opposed to the normal scheme of p = phi (0, a) which allows the select + /// to be pulled out of the loop. If the select(.., add, ..) can be predicated + /// by the target, this can lead to cleaner code generation. + bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const; + /// \returns True if the target wants to expand the given reduction intrinsic /// into a shuffle sequence. bool shouldExpandReduction(const IntrinsicInst *II) const; @@ -1229,6 +1356,9 @@ public: /// to a stack reload. unsigned getGISelRematGlobalCost() const; + /// \returns True if the target supports scalable vectors. + bool supportsScalableVectors() const; + /// \name Vector Predication Information /// @{ /// Whether the target supports the %evl parameter of VP intrinsic efficiently @@ -1268,6 +1398,7 @@ public: ArrayRef<const Value *> Operands, TTI::TargetCostKind CostKind) = 0; virtual unsigned getInliningThresholdMultiplier() = 0; + virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0; virtual int getInlinerVectorBonusPercent() = 0; virtual int getMemcpyCost(const Instruction *I) = 0; virtual unsigned @@ -1284,6 +1415,7 @@ public: virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const = 0; virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; + virtual unsigned getAssumedAddrSpace(const Value *V) const = 0; virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const = 0; @@ -1301,6 +1433,17 @@ public: AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) = 0; virtual bool emitGetActiveLaneMask() = 0; + virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) = 0; + virtual Optional<Value *> + simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, + APInt DemandedMask, KnownBits &Known, + bool &KnownBitsComputed) = 0; + virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic( + InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, + APInt &UndefElts2, APInt &UndefElts3, + std::function<void(Instruction *, unsigned, APInt, APInt &)> + SimplifyAndSetOp) = 0; virtual bool isLegalAddImmediate(int64_t Imm) = 0; virtual bool isLegalICmpImmediate(int64_t Imm) = 0; virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, @@ -1309,6 +1452,7 @@ public: Instruction *I) = 0; virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) = 0; + virtual bool isNumRegsMajorCostOfLSR() = 0; virtual bool isProfitableLSRChainElement(Instruction *I) = 0; virtual bool canMacroFuseCmp() = 0; virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, @@ -1335,6 +1479,7 @@ public: virtual bool isProfitableToHoist(Instruction *I) = 0; virtual bool useAA() = 0; virtual bool isTypeLegal(Type *Ty) = 0; + virtual unsigned getRegUsageForType(Type *Ty) = 0; virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; virtual bool useColdCCForColdCall(Function &F) = 0; @@ -1365,7 +1510,8 @@ public: virtual int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) = 0; virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty, TargetCostKind CostKind) = 0; + Type *Ty, TargetCostKind CostKind, + Instruction *Inst = nullptr) = 0; virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) = 0; @@ -1375,8 +1521,10 @@ public: virtual const char *getRegisterClassName(unsigned ClassID) const = 0; virtual unsigned getRegisterBitWidth(bool Vector) const = 0; virtual unsigned getMinVectorRegisterBitWidth() = 0; + virtual Optional<unsigned> getMaxVScale() const = 0; virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0; virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0; + virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0; virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; virtual unsigned getCacheLineSize() const = 0; @@ -1418,6 +1566,7 @@ public: virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, VectorType *SubTp) = 0; virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) = 0; virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, @@ -1425,6 +1574,7 @@ public: virtual int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) = 0; virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) = 0; virtual int getVectorInstrCost(unsigned Opcode, Type *Val, @@ -1452,6 +1602,9 @@ public: virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, TTI::TargetCostKind CostKind) = 0; + virtual InstructionCost getExtendedAddReductionCost( + bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0; virtual int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) = 0; virtual int getCallInstrCost(Function *F, Type *RetTy, @@ -1499,8 +1652,13 @@ public: VectorType *VecTy) const = 0; virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags) const = 0; + virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, + ReductionFlags) const = 0; + virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, + ReductionFlags) const = 0; virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; virtual unsigned getGISelRematGlobalCost() const = 0; + virtual bool supportsScalableVectors() const = 0; virtual bool hasActiveVectorLength() const = 0; virtual int getInstructionLatency(const Instruction *I) = 0; }; @@ -1525,6 +1683,9 @@ public: unsigned getInliningThresholdMultiplier() override { return Impl.getInliningThresholdMultiplier(); } + unsigned adjustInliningThreshold(const CallBase *CB) override { + return Impl.adjustInliningThreshold(CB); + } int getInlinerVectorBonusPercent() override { return Impl.getInlinerVectorBonusPercent(); } @@ -1558,6 +1719,10 @@ public: return Impl.isNoopAddrSpaceCast(FromAS, ToAS); } + unsigned getAssumedAddrSpace(const Value *V) const override { + return Impl.getAssumedAddrSpace(V); + } + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override { return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV); @@ -1588,6 +1753,26 @@ public: bool emitGetActiveLaneMask() override { return Impl.emitGetActiveLaneMask(); } + Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) override { + return Impl.instCombineIntrinsic(IC, II); + } + Optional<Value *> + simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, + APInt DemandedMask, KnownBits &Known, + bool &KnownBitsComputed) override { + return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known, + KnownBitsComputed); + } + Optional<Value *> simplifyDemandedVectorEltsIntrinsic( + InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, + APInt &UndefElts2, APInt &UndefElts3, + std::function<void(Instruction *, unsigned, APInt, APInt &)> + SimplifyAndSetOp) override { + return Impl.simplifyDemandedVectorEltsIntrinsic( + IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, + SimplifyAndSetOp); + } bool isLegalAddImmediate(int64_t Imm) override { return Impl.isLegalAddImmediate(Imm); } @@ -1604,6 +1789,9 @@ public: TargetTransformInfo::LSRCost &C2) override { return Impl.isLSRCostLess(C1, C2); } + bool isNumRegsMajorCostOfLSR() override { + return Impl.isNumRegsMajorCostOfLSR(); + } bool isProfitableLSRChainElement(Instruction *I) override { return Impl.isProfitableLSRChainElement(I); } @@ -1665,6 +1853,9 @@ public: } bool useAA() override { return Impl.useAA(); } bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } + unsigned getRegUsageForType(Type *Ty) override { + return Impl.getRegUsageForType(Ty); + } bool shouldBuildLookupTables() override { return Impl.shouldBuildLookupTables(); } @@ -1729,9 +1920,10 @@ public: TargetCostKind CostKind) override { return Impl.getIntImmCost(Imm, Ty, CostKind); } - int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty, TargetCostKind CostKind) override { - return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind); + int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, + TargetCostKind CostKind, + Instruction *Inst = nullptr) override { + return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst); } int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) override { @@ -1753,12 +1945,18 @@ public: unsigned getMinVectorRegisterBitWidth() override { return Impl.getMinVectorRegisterBitWidth(); } + Optional<unsigned> getMaxVScale() const override { + return Impl.getMaxVScale(); + } bool shouldMaximizeVectorBandwidth(bool OptSize) const override { return Impl.shouldMaximizeVectorBandwidth(OptSize); } unsigned getMinimumVF(unsigned ElemWidth) const override { return Impl.getMinimumVF(ElemWidth); } + unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override { + return Impl.getMaximumVF(ElemWidth, Opcode); + } bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override { return Impl.shouldConsiderAddressTypePromotion( @@ -1826,9 +2024,9 @@ public: return Impl.getShuffleCost(Kind, Tp, Index, SubTp); } int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, - TTI::TargetCostKind CostKind, + CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) override { - return Impl.getCastInstrCost(Opcode, Dst, Src, CostKind, I); + return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); } int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) override { @@ -1838,9 +2036,10 @@ public: return Impl.getCFInstrCost(Opcode, CostKind); } int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) override { - return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); } int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); @@ -1886,6 +2085,12 @@ public: return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned, CostKind); } + InstructionCost getExtendedAddReductionCost( + bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override { + return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty, + CostKind); + } int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) override { return Impl.getIntrinsicInstrCost(ICA, CostKind); @@ -1979,6 +2184,14 @@ public: ReductionFlags Flags) const override { return Impl.useReductionIntrinsic(Opcode, Ty, Flags); } + bool preferInLoopReduction(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const override { + return Impl.preferInLoopReduction(Opcode, Ty, Flags); + } + bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const override { + return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags); + } bool shouldExpandReduction(const IntrinsicInst *II) const override { return Impl.shouldExpandReduction(II); } @@ -1987,6 +2200,10 @@ public: return Impl.getGISelRematGlobalCost(); } + bool supportsScalableVectors() const override { + return Impl.supportsScalableVectors(); + } + bool hasActiveVectorLength() const override { return Impl.hasActiveVectorLength(); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 0ce975d6d4b5..7e31cb365a87 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -20,7 +20,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -46,7 +46,7 @@ public: int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef<const Value *> Operands, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const { // In the basic model, we just assume that all-constant GEPs will be folded // into their uses via addressing modes. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) @@ -59,28 +59,31 @@ public: unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI) { + BlockFrequencyInfo *BFI) const { (void)PSI; (void)BFI; JTSize = 0; return SI.getNumCases(); } - unsigned getInliningThresholdMultiplier() { return 1; } + unsigned getInliningThresholdMultiplier() const { return 1; } + unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; } - int getInlinerVectorBonusPercent() { return 150; } + int getInlinerVectorBonusPercent() const { return 150; } - unsigned getMemcpyCost(const Instruction *I) { return TTI::TCC_Expensive; } + unsigned getMemcpyCost(const Instruction *I) const { + return TTI::TCC_Expensive; + } - bool hasBranchDivergence() { return false; } + bool hasBranchDivergence() const { return false; } - bool useGPUDivergenceAnalysis() { return false; } + bool useGPUDivergenceAnalysis() const { return false; } - bool isSourceOfDivergence(const Value *V) { return false; } + bool isSourceOfDivergence(const Value *V) const { return false; } - bool isAlwaysUniform(const Value *V) { return false; } + bool isAlwaysUniform(const Value *V) const { return false; } - unsigned getFlatAddressSpace() { return -1; } + unsigned getFlatAddressSpace() const { return -1; } bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const { @@ -89,12 +92,14 @@ public: bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } + unsigned getAssumedAddrSpace(const Value *V) const { return -1; } + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const { return nullptr; } - bool isLoweredToCall(const Function *F) { + bool isLoweredToCall(const Function *F) const { assert(F && "A concrete function must be provided to this routine."); // FIXME: These should almost certainly not be handled here, and instead @@ -132,7 +137,7 @@ public: bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, - HardwareLoopInfo &HWLoopInfo) { + HardwareLoopInfo &HWLoopInfo) const { return false; } @@ -147,38 +152,60 @@ public: return false; } + Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) const { + return None; + } + + Optional<Value *> + simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, + APInt DemandedMask, KnownBits &Known, + bool &KnownBitsComputed) const { + return None; + } + + Optional<Value *> simplifyDemandedVectorEltsIntrinsic( + InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, + APInt &UndefElts2, APInt &UndefElts3, + std::function<void(Instruction *, unsigned, APInt, APInt &)> + SimplifyAndSetOp) const { + return None; + } + void getUnrollingPreferences(Loop *, ScalarEvolution &, - TTI::UnrollingPreferences &) {} + TTI::UnrollingPreferences &) const {} void getPeelingPreferences(Loop *, ScalarEvolution &, - TTI::PeelingPreferences &) {} + TTI::PeelingPreferences &) const {} - bool isLegalAddImmediate(int64_t Imm) { return false; } + bool isLegalAddImmediate(int64_t Imm) const { return false; } - bool isLegalICmpImmediate(int64_t Imm) { return false; } + bool isLegalICmpImmediate(int64_t Imm) const { return false; } bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, - Instruction *I = nullptr) { + Instruction *I = nullptr) const { // Guess that only reg and reg+reg addressing is allowed. This heuristic is // taken from the implementation of LSR. return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); } - bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { + bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const { return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) < std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost); } - bool isProfitableLSRChainElement(Instruction *I) { return false; } + bool isNumRegsMajorCostOfLSR() const { return true; } + + bool isProfitableLSRChainElement(Instruction *I) const { return false; } - bool canMacroFuseCmp() { return false; } + bool canMacroFuseCmp() const { return false; } bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, - TargetLibraryInfo *LibInfo) { + TargetLibraryInfo *LibInfo) const { return false; } @@ -186,40 +213,51 @@ public: bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } - bool isLegalMaskedStore(Type *DataType, Align Alignment) { return false; } + bool isLegalMaskedStore(Type *DataType, Align Alignment) const { + return false; + } - bool isLegalMaskedLoad(Type *DataType, Align Alignment) { return false; } + bool isLegalMaskedLoad(Type *DataType, Align Alignment) const { + return false; + } - bool isLegalNTStore(Type *DataType, Align Alignment) { + bool isLegalNTStore(Type *DataType, Align Alignment) const { // By default, assume nontemporal memory stores are available for stores // that are aligned and have a size that is a power of 2. unsigned DataSize = DL.getTypeStoreSize(DataType); return Alignment >= DataSize && isPowerOf2_32(DataSize); } - bool isLegalNTLoad(Type *DataType, Align Alignment) { + bool isLegalNTLoad(Type *DataType, Align Alignment) const { // By default, assume nontemporal memory loads are available for loads that // are aligned and have a size that is a power of 2. unsigned DataSize = DL.getTypeStoreSize(DataType); return Alignment >= DataSize && isPowerOf2_32(DataSize); } - bool isLegalMaskedScatter(Type *DataType, Align Alignment) { return false; } + bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { + return false; + } - bool isLegalMaskedGather(Type *DataType, Align Alignment) { return false; } + bool isLegalMaskedGather(Type *DataType, Align Alignment) const { + return false; + } - bool isLegalMaskedCompressStore(Type *DataType) { return false; } + bool isLegalMaskedCompressStore(Type *DataType) const { return false; } - bool isLegalMaskedExpandLoad(Type *DataType) { return false; } + bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } - bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } + bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } - bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { + return false; + } - bool prefersVectorizedAddressing() { return true; } + bool prefersVectorizedAddressing() const { return true; } int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { + bool HasBaseReg, int64_t Scale, + unsigned AddrSpace) const { // Guess that all legal addressing mode are free. if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace)) @@ -227,80 +265,87 @@ public: return -1; } - bool LSRWithInstrQueries() { return false; } + bool LSRWithInstrQueries() const { return false; } - bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } + bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } - bool isProfitableToHoist(Instruction *I) { return true; } + bool isProfitableToHoist(Instruction *I) const { return true; } - bool useAA() { return false; } + bool useAA() const { return false; } - bool isTypeLegal(Type *Ty) { return false; } + bool isTypeLegal(Type *Ty) const { return false; } - bool shouldBuildLookupTables() { return true; } - bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } + unsigned getRegUsageForType(Type *Ty) const { return 1; } - bool useColdCCForColdCall(Function &F) { return false; } + bool shouldBuildLookupTables() const { return true; } + bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } + + bool useColdCCForColdCall(Function &F) const { return false; } unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, - bool Insert, bool Extract) { + bool Insert, bool Extract) const { return 0; } unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, - unsigned VF) { + unsigned VF) const { return 0; } - bool supportsEfficientVectorElementLoadStore() { return false; } + bool supportsEfficientVectorElementLoadStore() const { return false; } - bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } + bool enableAggressiveInterleaving(bool LoopHasReductions) const { + return false; + } TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { return {}; } - bool enableInterleavedAccessVectorization() { return false; } + bool enableInterleavedAccessVectorization() const { return false; } - bool enableMaskedInterleavedAccessVectorization() { return false; } + bool enableMaskedInterleavedAccessVectorization() const { return false; } - bool isFPVectorizationPotentiallyUnsafe() { return false; } + bool isFPVectorizationPotentiallyUnsafe() const { return false; } bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, - bool *Fast) { + bool *Fast) const { return false; } - TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { + TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { return TTI::PSK_Software; } - bool haveFastSqrt(Type *Ty) { return false; } + bool haveFastSqrt(Type *Ty) const { return false; } - bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; } + bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } - unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } + unsigned getFPOpCost(Type *Ty) const { + return TargetTransformInfo::TCC_Basic; + } int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) { + Type *Ty) const { return 0; } unsigned getIntImmCost(const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind) const { return TTI::TCC_Basic; } unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty, TTI::TargetCostKind CostKind) { + Type *Ty, TTI::TargetCostKind CostKind, + Instruction *Inst = nullptr) const { return TTI::TCC_Free; } unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind) const { return TTI::TCC_Free; } @@ -323,15 +368,18 @@ public: unsigned getRegisterBitWidth(bool Vector) const { return 32; } - unsigned getMinVectorRegisterBitWidth() { return 128; } + unsigned getMinVectorRegisterBitWidth() const { return 128; } + + Optional<unsigned> getMaxVScale() const { return None; } bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } unsigned getMinimumVF(unsigned ElemWidth) const { return 0; } - bool - shouldConsiderAddressTypePromotion(const Instruction &I, - bool &AllowPromotionWithoutCommonHeader) { + unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } + + bool shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { AllowPromotionWithoutCommonHeader = false; return false; } @@ -370,7 +418,7 @@ public: unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } bool enableWritePrefetching() const { return false; } - unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } + unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, @@ -379,7 +427,7 @@ public: TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, - const Instruction *CxtI = nullptr) { + const Instruction *CxtI = nullptr) const { // FIXME: A number of transformation tests seem to require these values // which seems a little odd for how arbitary there are. switch (Opcode) { @@ -398,13 +446,14 @@ public: } unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, int Index, - VectorType *SubTp) { + VectorType *SubTp) const { return 1; } unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, - const Instruction *I) { + const Instruction *I) const { switch (Opcode) { default: break; @@ -427,23 +476,24 @@ public: // Identity and pointer-to-pointer casts are free. return 0; break; - case Instruction::Trunc: + case Instruction::Trunc: { // trunc to a native type is free (assuming the target has compare and // shift-right of the same width). - if (DL.isLegalInteger(DL.getTypeSizeInBits(Dst))) + TypeSize DstSize = DL.getTypeSizeInBits(Dst); + if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedSize())) return 0; break; } + } return 1; } unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, - VectorType *VecTy, unsigned Index) { + VectorType *VecTy, unsigned Index) const { return 1; } - unsigned getCFInstrCost(unsigned Opcode, - TTI::TargetCostKind CostKind) { + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) const { // A phi would be free, unless we're costing the throughput because it // will require a register. if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) @@ -452,12 +502,14 @@ public: } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const { return 1; } - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { return 1; } @@ -469,32 +521,33 @@ public: unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind) const { return 1; } unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, - const Instruction *I = nullptr) { + const Instruction *I = nullptr) const { return 1; } unsigned getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, - bool UseMaskForCond, bool UseMaskForGaps) { + bool UseMaskForCond, bool UseMaskForGaps) const { return 1; } unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind) const { switch (ICA.getID()) { default: break; case Intrinsic::annotation: case Intrinsic::assume: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: @@ -505,6 +558,7 @@ public: case Intrinsic::is_constant: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: @@ -526,26 +580,38 @@ public: } unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind) const { return 1; } - unsigned getNumberOfParts(Type *Tp) { return 0; } + unsigned getNumberOfParts(Type *Tp) const { return 0; } unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, - const SCEV *) { + const SCEV *) const { return 0; } unsigned getArithmeticReductionCost(unsigned, VectorType *, bool, - TTI::TargetCostKind) { return 1; } + TTI::TargetCostKind) const { + return 1; + } unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool, - TTI::TargetCostKind) { return 1; } + TTI::TargetCostKind) const { + return 1; + } + + InstructionCost getExtendedAddReductionCost( + bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const { + return 1; + } - unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; } + unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { + return 0; + } - bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) { + bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { return false; } @@ -559,7 +625,7 @@ public: } Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, - Type *ExpectedType) { + Type *ExpectedType) const { return nullptr; } @@ -637,22 +703,34 @@ public: return false; } + bool preferInLoopReduction(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const { + return false; + } + + bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const { + return false; + } + bool shouldExpandReduction(const IntrinsicInst *II) const { return true; } unsigned getGISelRematGlobalCost() const { return 1; } + bool supportsScalableVectors() const { return false; } + bool hasActiveVectorLength() const { return false; } protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. - unsigned minRequiredElementSize(const Value *Val, bool &isSigned) { + unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const { if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { const auto *VectorValue = cast<Constant>(Val); // In case of a vector need to pick the max between the min // required size for each element - auto *VT = cast<VectorType>(Val->getType()); + auto *VT = cast<FixedVectorType>(Val->getType()); // Assume unsigned elements isSigned = false; @@ -700,12 +778,12 @@ protected: return Val->getType()->getScalarSizeInBits(); } - bool isStridedAccess(const SCEV *Ptr) { + bool isStridedAccess(const SCEV *Ptr) const { return Ptr && isa<SCEVAddRecExpr>(Ptr); } const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, - const SCEV *Ptr) { + const SCEV *Ptr) const { if (!isStridedAccess(Ptr)) return nullptr; const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); @@ -713,7 +791,7 @@ protected: } bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, - int64_t MergeDistance) { + int64_t MergeDistance) const { const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); if (!Step) return false; @@ -775,7 +853,12 @@ public: uint64_t Field = ConstIdx->getZExtValue(); BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); } else { - int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); + // If this operand is a scalable type, bail out early. + // TODO: handle scalable vectors + if (isa<ScalableVectorType>(TargetType)) + return TTI::TCC_Basic; + int64_t ElementSize = + DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize(); if (ConstIdx) { BaseOffset += ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; @@ -800,30 +883,17 @@ public: int getUserCost(const User *U, ArrayRef<const Value *> Operands, TTI::TargetCostKind CostKind) { auto *TargetTTI = static_cast<T *>(this); - - // FIXME: We shouldn't have to special-case intrinsics here. - if (CostKind == TTI::TCK_RecipThroughput) { - if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { - IntrinsicCostAttributes CostAttrs(*II); - return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); - } - } - + // Handle non-intrinsic calls, invokes, and callbr. // FIXME: Unlikely to be true for anything but CodeSize. - if (const auto *CB = dyn_cast<CallBase>(U)) { - const Function *F = CB->getCalledFunction(); - if (F) { - FunctionType *FTy = F->getFunctionType(); - if (Intrinsic::ID IID = F->getIntrinsicID()) { - IntrinsicCostAttributes Attrs(IID, *CB); - return TargetTTI->getIntrinsicInstrCost(Attrs, CostKind); - } - + auto *CB = dyn_cast<CallBase>(U); + if (CB && !isa<IntrinsicInst>(U)) { + if (const Function *F = CB->getCalledFunction()) { if (!TargetTTI->isLoweredToCall(F)) return TTI::TCC_Basic; // Give a basic cost if it will be lowered - return TTI::TCC_Basic * (FTy->getNumParams() + 1); + return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1); } + // For indirect or other calls, scale cost by number of arguments. return TTI::TCC_Basic * (CB->arg_size() + 1); } @@ -835,6 +905,12 @@ public: switch (Opcode) { default: break; + case Instruction::Call: { + assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call"); + auto *Intrinsic = cast<IntrinsicInst>(U); + IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB); + return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); + } case Instruction::Br: case Instruction::Ret: case Instruction::PHI: @@ -895,7 +971,8 @@ public: case Instruction::SExt: case Instruction::ZExt: case Instruction::AddrSpaceCast: - return TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I); + return TargetTTI->getCastInstrCost( + Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I); case Instruction::Store: { auto *SI = cast<StoreInst>(U); Type *ValTy = U->getOperand(0)->getType(); @@ -912,12 +989,16 @@ public: case Instruction::Select: { Type *CondTy = U->getOperand(0)->getType(); return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind, I); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = U->getOperand(0)->getType(); + // TODO: Also handle ICmp/FCmp constant expressions. return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), + I ? cast<CmpInst>(I)->getPredicate() + : CmpInst::BAD_ICMP_PREDICATE, CostKind, I); } case Instruction::InsertElement: { @@ -969,41 +1050,23 @@ public: if (CI) Idx = CI->getZExtValue(); - // Try to match a reduction sequence (series of shufflevector and - // vector adds followed by a extractelement). - unsigned ReduxOpCode; - VectorType *ReduxType; - - switch (TTI::matchVectorSplittingReduction(EEI, ReduxOpCode, - ReduxType)) { - case TTI::RK_Arithmetic: - return TargetTTI->getArithmeticReductionCost(ReduxOpCode, ReduxType, - /*IsPairwiseForm=*/false, - CostKind); - case TTI::RK_MinMax: - return TargetTTI->getMinMaxReductionCost( - ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)), - /*IsPairwiseForm=*/false, /*IsUnsigned=*/false, CostKind); - case TTI::RK_UnsignedMinMax: - return TargetTTI->getMinMaxReductionCost( - ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)), - /*IsPairwiseForm=*/false, /*IsUnsigned=*/true, CostKind); - case TTI::RK_None: - break; - } - - switch (TTI::matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { + // Try to match a reduction (a series of shufflevector and vector ops + // followed by an extractelement). + unsigned RdxOpcode; + VectorType *RdxType; + bool IsPairwise; + switch (TTI::matchVectorReduction(EEI, RdxOpcode, RdxType, IsPairwise)) { case TTI::RK_Arithmetic: - return TargetTTI->getArithmeticReductionCost(ReduxOpCode, ReduxType, - /*IsPairwiseForm=*/true, CostKind); + return TargetTTI->getArithmeticReductionCost(RdxOpcode, RdxType, + IsPairwise, CostKind); case TTI::RK_MinMax: return TargetTTI->getMinMaxReductionCost( - ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)), - /*IsPairwiseForm=*/true, /*IsUnsigned=*/false, CostKind); + RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)), + IsPairwise, /*IsUnsigned=*/false, CostKind); case TTI::RK_UnsignedMinMax: return TargetTTI->getMinMaxReductionCost( - ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)), - /*IsPairwiseForm=*/true, /*IsUnsigned=*/true, CostKind); + RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)), + IsPairwise, /*IsUnsigned=*/true, CostKind); case TTI::RK_None: break; } @@ -1016,8 +1079,7 @@ public: } int getInstructionLatency(const Instruction *I) { - SmallVector<const Value *, 4> Operands(I->value_op_begin(), - I->value_op_end()); + SmallVector<const Value *, 4> Operands(I->operand_values()); if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free) return 0; diff --git a/llvm/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h b/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h index 033ea05b77fa..d02bcd0e335b 100644 --- a/llvm/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h +++ b/llvm/include/llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h @@ -101,6 +101,12 @@ private: StringRef ModuleName; }; +enum class InlinerFunctionImportStatsOpts { + No = 0, + Basic = 1, + Verbose = 2, +}; + } // llvm #endif // LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H diff --git a/llvm/include/llvm/Analysis/Utils/Local.h b/llvm/include/llvm/Analysis/Utils/Local.h index f31b56345424..bd82b34165d6 100644 --- a/llvm/include/llvm/Analysis/Utils/Local.h +++ b/llvm/include/llvm/Analysis/Utils/Local.h @@ -30,7 +30,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions = false) { GEPOperator *GEPOp = cast<GEPOperator>(GEP); Type *IntIdxTy = DL.getIndexType(GEP->getType()); - Value *Result = Constant::getNullValue(IntIdxTy); + Value *Result = nullptr; // If the GEP is inbounds, we know that none of the addressing operations will // overflow in a signed sense. @@ -46,6 +46,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, ++i, ++GTI) { Value *Op = *i; uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask; + Value *Offset; if (Constant *OpC = dyn_cast<Constant>(Op)) { if (OpC->isZeroValue()) continue; @@ -54,46 +55,47 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, if (StructType *STy = GTI.getStructTypeOrNull()) { uint64_t OpValue = OpC->getUniqueInteger().getZExtValue(); Size = DL.getStructLayout(STy)->getElementOffset(OpValue); - - if (Size) - Result = Builder->CreateAdd(Result, ConstantInt::get(IntIdxTy, Size), - GEP->getName().str()+".offs"); - continue; + if (!Size) + continue; + + Offset = ConstantInt::get(IntIdxTy, Size); + } else { + // Splat the constant if needed. + if (IntIdxTy->isVectorTy() && !OpC->getType()->isVectorTy()) + OpC = ConstantVector::getSplat( + cast<VectorType>(IntIdxTy)->getElementCount(), OpC); + + Constant *Scale = ConstantInt::get(IntIdxTy, Size); + Constant *OC = + ConstantExpr::getIntegerCast(OpC, IntIdxTy, true /*SExt*/); + Offset = + ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/); } - - // Splat the constant if needed. - if (IntIdxTy->isVectorTy() && !OpC->getType()->isVectorTy()) - OpC = ConstantVector::getSplat( - cast<VectorType>(IntIdxTy)->getElementCount(), OpC); - - Constant *Scale = ConstantInt::get(IntIdxTy, Size); - Constant *OC = ConstantExpr::getIntegerCast(OpC, IntIdxTy, true /*SExt*/); - Scale = - ConstantExpr::getMul(OC, Scale, false /*NUW*/, isInBounds /*NSW*/); - // Emit an add instruction. - Result = Builder->CreateAdd(Result, Scale, GEP->getName().str()+".offs"); - continue; - } - - // Splat the index if needed. - if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy()) - Op = Builder->CreateVectorSplat( - cast<VectorType>(IntIdxTy)->getNumElements(), Op); - - // Convert to correct type. - if (Op->getType() != IntIdxTy) - Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName().str()+".c"); - if (Size != 1) { - // We'll let instcombine(mul) convert this to a shl if possible. - Op = Builder->CreateMul(Op, ConstantInt::get(IntIdxTy, Size), - GEP->getName().str() + ".idx", false /*NUW*/, - isInBounds /*NSW*/); + } else { + // Splat the index if needed. + if (IntIdxTy->isVectorTy() && !Op->getType()->isVectorTy()) + Op = Builder->CreateVectorSplat( + cast<FixedVectorType>(IntIdxTy)->getNumElements(), Op); + + // Convert to correct type. + if (Op->getType() != IntIdxTy) + Op = Builder->CreateIntCast(Op, IntIdxTy, true, Op->getName().str()+".c"); + if (Size != 1) { + // We'll let instcombine(mul) convert this to a shl if possible. + Op = Builder->CreateMul(Op, ConstantInt::get(IntIdxTy, Size), + GEP->getName().str() + ".idx", false /*NUW*/, + isInBounds /*NSW*/); + } + Offset = Op; } - // Emit an add instruction. - Result = Builder->CreateAdd(Op, Result, GEP->getName().str()+".offs"); + if (Result) + Result = Builder->CreateAdd(Result, Offset, GEP->getName().str()+".offs", + false /*NUW*/, isInBounds /*NSW*/); + else + Result = Offset; } - return Result; + return Result ? Result : Constant::getNullValue(IntIdxTy); } } diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h index 2ab2c7a57d94..ea6bc2cf19ee 100644 --- a/llvm/include/llvm/Analysis/Utils/TFUtils.h +++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h @@ -9,10 +9,11 @@ #ifndef LLVM_ANALYSIS_UTILS_TFUTILS_H #define LLVM_ANALYSIS_UTILS_TFUTILS_H -#include "llvm/Config/config.h" +#include "llvm/Config/llvm-config.h" #ifdef LLVM_HAVE_TF_API #include "llvm/IR/LLVMContext.h" +#include "llvm/Support/JSON.h" #include <memory> #include <vector> @@ -36,6 +37,141 @@ namespace llvm { class TFModelEvaluatorImpl; class EvaluationResultImpl; +/// TensorSpec encapsulates the specification of a tensor: its dimensions, or +/// "shape" (row-major), its type (see TensorSpec::getDataType specializations +/// for supported types), its name and port (see "TensorFlow: Large-Scale +/// Machine Learning on Heterogeneous Distributed Systems", section 4.2, para 2: +/// https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/45166.pdf) +/// +/// TensorSpec is used to set up a TFModelEvaluator by describing the expected +/// inputs and outputs. +class TensorSpec final { +public: + template <typename T> + static TensorSpec createSpec(const std::string &Name, + const std::vector<int64_t> &Shape, + int Port = 0) { + return TensorSpec(Name, Port, getDataType<T>(), Shape); + } + + const std::string &name() const { return Name; } + int port() const { return Port; } + int typeIndex() const { return TypeIndex; } + const std::vector<int64_t> &shape() const { return Shape; } + + bool operator==(const TensorSpec &Other) const { + return Name == Other.Name && Port == Other.Port && + TypeIndex == Other.TypeIndex && Shape == Other.Shape; + } + + bool operator!=(const TensorSpec &Other) const { return !(*this == Other); } + + /// Get the number of elements in a tensor with this shape. + size_t getElementCount() const { return ElementCount; } + /// Get the size, in bytes, of one element. + size_t getElementByteSize() const; + + template <typename T> bool isElementType() const { + return getDataType<T>() == TypeIndex; + } + +private: + TensorSpec(const std::string &Name, int Port, int TypeIndex, + const std::vector<int64_t> &Shape); + + template <typename T> static int getDataType() { + llvm_unreachable("Undefined tensor type"); + } + + std::string Name; + int Port = 0; + int TypeIndex = 0; + std::vector<int64_t> Shape; + size_t ElementCount = 0; +}; + +/// Construct a TensorSpec from a JSON dictionary of the form: +/// { "name": <string>, +/// "port": <int>, +/// "type": <string. Use LLVM's types, e.g. float, double, int64_t>, +/// "shape": <array of ints> } +/// For the "type" field, see the C++ primitive types used in +/// TFUTILS_SUPPORTED_TYPES. +Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx, + const json::Value &Value); + +struct LoggedFeatureSpec { + TensorSpec Spec; + Optional<std::string> LoggingName; +}; + +/// Load the output specs. If SpecFileOverride is not empty, that path is used. +/// Otherwise, the file is assumed to be called 'output_spec.json' and be found +/// under ModelPath (the model directory). +/// The first output tensor name must match ExpectedDecisionName. +/// In case of error, the return is None and the error is logged. +Optional<std::vector<LoggedFeatureSpec>> +loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName, + StringRef ModelPath, StringRef SpecFileOverride = StringRef()); + +/// Logging utility - given an ordered specification of features, and assuming +/// a scalar reward, allow logging feature values and rewards, and then print +/// as tf.train.SequenceExample text protobuf. +/// The assumption is that, for an event to be logged (i.e. a set of feature +/// values and a reward), the user calls the log* API for each feature exactly +/// once, providing the index matching the position in the feature spec list +/// provided at construction: +/// event 0: +/// logTensorValue(0, ...) +/// logTensorValue(1, ...) +/// ... +/// logReward(...) +/// event 1: +/// logTensorValue(0, ...) +/// logTensorValue(1, ...) +/// ... +/// logReward(...) +/// +/// At the end, call print to generate the protobuf. +class Logger final { +public: + /// Construct a Logger. If IncludeReward is false, then logReward shouldn't + /// be called, and the reward feature won't be printed out. + Logger(const std::vector<LoggedFeatureSpec> &FeatureSpecs, + const TensorSpec &RewardSpec, bool IncludeReward) + : FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec), + RawLogData(FeatureSpecs.size() + IncludeReward), + IncludeReward(IncludeReward) {} + + template <typename T> void logReward(T Value) { + assert(IncludeReward); + logTensorValue(RawLogData.size() - 1, &Value); + } + + template <typename T> void logFinalReward(T Value) { + assert(RawLogData.back().empty()); + logReward(Value); + } + + template <typename T> + void logTensorValue(size_t FeatureID, const T *Value, size_t Size = 1) { + const char *Start = reinterpret_cast<const char *>(Value); + const char *End = Start + sizeof(T) * Size; + RawLogData[FeatureID].insert(RawLogData[FeatureID].end(), Start, End); + } + + void print(raw_ostream &OS); + +private: + std::vector<LoggedFeatureSpec> FeatureSpecs; + TensorSpec RewardSpec; + /// RawData has one entry per feature, plus one more for the reward. + /// Each feature's values are then stored in a vector, in succession. + /// This means the ith event is stored at [*][i] + std::vector<std::vector<char>> RawLogData; + const bool IncludeReward; +}; + class TFModelEvaluator final { public: /// The result of a model evaluation. Handles the lifetime of the output @@ -44,25 +180,41 @@ public: class EvaluationResult { public: EvaluationResult(const EvaluationResult &) = delete; + EvaluationResult &operator=(const EvaluationResult &Other) = delete; + EvaluationResult(EvaluationResult &&Other); + EvaluationResult &operator=(EvaluationResult &&Other); + ~EvaluationResult(); - /// Get a pointer to the first element of the tensor at Index. + /// Get a (const) pointer to the first element of the tensor at Index. template <typename T> T *getTensorValue(size_t Index) { return static_cast<T *>(getUntypedTensorValue(Index)); } + template <typename T> const T *getTensorValue(size_t Index) const { + return static_cast<T *>(getUntypedTensorValue(Index)); + } + + /// Get a (const) pointer to the untyped data of the tensor. + void *getUntypedTensorValue(size_t Index); + const void *getUntypedTensorValue(size_t Index) const; + private: friend class TFModelEvaluator; EvaluationResult(std::unique_ptr<EvaluationResultImpl> Impl); - void *getUntypedTensorValue(size_t Index); std::unique_ptr<EvaluationResultImpl> Impl; }; TFModelEvaluator(StringRef SavedModelPath, - const std::vector<std::string> &InputNames, - const std::vector<std::string> &OutputNames, + const std::vector<TensorSpec> &InputSpecs, + const std::vector<TensorSpec> &OutputSpecs, const char *Tags = "serve"); + TFModelEvaluator(StringRef SavedModelPath, + const std::vector<TensorSpec> &InputSpecs, + function_ref<TensorSpec(size_t)> GetOutputSpecs, + size_t OutputSpecsSize, const char *Tags = "serve"); + ~TFModelEvaluator(); TFModelEvaluator(const TFModelEvaluator &) = delete; TFModelEvaluator(TFModelEvaluator &&) = delete; @@ -82,33 +234,32 @@ public: /// otherwise. bool isValid() const { return !!Impl; } - /// Initialize the input at Index as a tensor of the given type and - /// dimensions. - template <typename T> - void initInput(size_t Index, const std::vector<int64_t> &Dimensions) { - return initInput(Index, getModelTypeIndex<T>(), Dimensions); - } - private: void *getUntypedInput(size_t Index); - template <typename T> int getModelTypeIndex(); - void initInput(size_t Index, int TypeIndex, - const std::vector<int64_t> &Dimensions); - std::unique_ptr<TFModelEvaluatorImpl> Impl; }; -template <> int TFModelEvaluator::getModelTypeIndex<float>(); -template <> int TFModelEvaluator::getModelTypeIndex<double>(); -template <> int TFModelEvaluator::getModelTypeIndex<int8_t>(); -template <> int TFModelEvaluator::getModelTypeIndex<uint8_t>(); -template <> int TFModelEvaluator::getModelTypeIndex<int16_t>(); -template <> int TFModelEvaluator::getModelTypeIndex<uint16_t>(); -template <> int TFModelEvaluator::getModelTypeIndex<int32_t>(); -template <> int TFModelEvaluator::getModelTypeIndex<uint32_t>(); -template <> int TFModelEvaluator::getModelTypeIndex<int64_t>(); -template <> int TFModelEvaluator::getModelTypeIndex<uint64_t>(); +/// List of supported types, as a pair: +/// - C++ type +/// - enum name (implementation-specific) +#define TFUTILS_SUPPORTED_TYPES(M) \ + M(float, TF_FLOAT) \ + M(double, TF_DOUBLE) \ + M(int8_t, TF_INT8) \ + M(uint8_t, TF_UINT8) \ + M(int16_t, TF_INT16) \ + M(uint16_t, TF_UINT16) \ + M(int32_t, TF_INT32) \ + M(uint32_t, TF_UINT32) \ + M(int64_t, TF_INT64) \ + M(uint64_t, TF_UINT64) + +#define TFUTILS_GETDATATYPE_DEF(T, E) \ + template <> int TensorSpec::getDataType<T>(); + +TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_DEF) +#undef TFUTILS_GETDATATYPE_DEF } // namespace llvm #endif // LLVM_HAVE_TF_API diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h index bf5bab9ced22..108d08033ac3 100644 --- a/llvm/include/llvm/Analysis/ValueLattice.h +++ b/llvm/include/llvm/Analysis/ValueLattice.h @@ -11,6 +11,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" // //===----------------------------------------------------------------------===// // ValueLatticeElement @@ -456,6 +457,16 @@ public: if (isConstant() && Other.isConstant()) return ConstantExpr::getCompare(Pred, getConstant(), Other.getConstant()); + if (ICmpInst::isEquality(Pred)) { + // not(C) != C => true, not(C) == C => false. + if ((isNotConstant() && Other.isConstant() && + getNotConstant() == Other.getConstant()) || + (isConstant() && Other.isNotConstant() && + getConstant() == Other.getNotConstant())) + return Pred == ICmpInst::ICMP_NE + ? ConstantInt::getTrue(Ty) : ConstantInt::getFalse(Ty); + } + // Integer constants are represented as ConstantRanges with single // elements. if (!isConstantRange() || !Other.isConstantRange()) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 9510739ef5ab..86c0991451c5 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -21,12 +21,14 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" #include <cassert> #include <cstdint> namespace llvm { class AddOperator; +class AllocaInst; class APInt; class AssumptionCache; class DominatorTree; @@ -43,6 +45,8 @@ class StringRef; class TargetLibraryInfo; class Value; +constexpr unsigned MaxAnalysisRecursionDepth = 6; + /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. /// @@ -366,14 +370,13 @@ class Value; /// that the returned value has pointer type if the specified value does. If /// the MaxLookup value is non-zero, it limits the number of instructions to /// be stripped off. - Value *GetUnderlyingObject(Value *V, const DataLayout &DL, - unsigned MaxLookup = 6); - inline const Value *GetUnderlyingObject(const Value *V, const DataLayout &DL, + Value *getUnderlyingObject(Value *V, unsigned MaxLookup = 6); + inline const Value *getUnderlyingObject(const Value *V, unsigned MaxLookup = 6) { - return GetUnderlyingObject(const_cast<Value *>(V), DL, MaxLookup); + return getUnderlyingObject(const_cast<Value *>(V), MaxLookup); } - /// This method is similar to GetUnderlyingObject except that it can + /// This method is similar to getUnderlyingObject except that it can /// look through phi and select instructions and return multiple objects. /// /// If LoopInfo is passed, loop phis are further analyzed. If a pointer @@ -401,20 +404,30 @@ class Value; /// Since A[i] and A[i-1] are independent pointers, getUnderlyingObjects /// should not assume that Curr and Prev share the same underlying object thus /// it shouldn't look through the phi above. - void GetUnderlyingObjects(const Value *V, + void getUnderlyingObjects(const Value *V, SmallVectorImpl<const Value *> &Objects, - const DataLayout &DL, LoopInfo *LI = nullptr, - unsigned MaxLookup = 6); + LoopInfo *LI = nullptr, unsigned MaxLookup = 6); - /// This is a wrapper around GetUnderlyingObjects and adds support for basic + /// This is a wrapper around getUnderlyingObjects and adds support for basic /// ptrtoint+arithmetic+inttoptr sequences. bool getUnderlyingObjectsForCodeGen(const Value *V, - SmallVectorImpl<Value *> &Objects, - const DataLayout &DL); + SmallVectorImpl<Value *> &Objects); + + /// Returns unique alloca where the value comes from, or nullptr. + /// If OffsetZero is true check that V points to the begining of the alloca. + AllocaInst *findAllocaForValue(Value *V, bool OffsetZero = false); + inline const AllocaInst *findAllocaForValue(const Value *V, + bool OffsetZero = false) { + return findAllocaForValue(const_cast<Value *>(V), OffsetZero); + } /// Return true if the only users of this pointer are lifetime markers. bool onlyUsedByLifetimeMarkers(const Value *V); + /// Return true if the only users of this pointer are lifetime markers or + /// droppable instructions. + bool onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V); + /// Return true if speculation of the given load must be suppressed to avoid /// ordering or interfering with an active sanitizer. If not suppressed, /// dereferenceability and alignment must be proven separately. Note: This @@ -571,45 +584,65 @@ class Value; /// if, for all i, r is evaluated to poison or op raises UB if vi = poison. /// To filter out operands that raise UB on poison, you can use /// getGuaranteedNonPoisonOp. - bool propagatesPoison(const Instruction *I); + bool propagatesPoison(const Operator *I); - /// Return either nullptr or an operand of I such that I will trigger - /// undefined behavior if I is executed and that operand has a poison - /// value. - const Value *getGuaranteedNonPoisonOp(const Instruction *I); + /// Insert operands of I into Ops such that I will trigger undefined behavior + /// if I is executed and that operand has a poison value. + void getGuaranteedNonPoisonOps(const Instruction *I, + SmallPtrSetImpl<const Value *> &Ops); - /// Return true if the given instruction must trigger undefined behavior. + /// Return true if the given instruction must trigger undefined behavior /// when I is executed with any operands which appear in KnownPoison holding /// a poison value at the point of execution. bool mustTriggerUB(const Instruction *I, const SmallSet<const Value *, 16>& KnownPoison); - /// Return true if this function can prove that if PoisonI is executed - /// and yields a poison value, then that will trigger undefined behavior. + /// Return true if this function can prove that if Inst is executed + /// and yields a poison value or undef bits, then that will trigger + /// undefined behavior. /// /// Note that this currently only considers the basic block that is - /// the parent of I. - bool programUndefinedIfPoison(const Instruction *PoisonI); - - /// Return true if I can create poison from non-poison operands. - /// For vectors, canCreatePoison returns true if there is potential poison in - /// any element of the result when vectors without poison are given as + /// the parent of Inst. + bool programUndefinedIfUndefOrPoison(const Instruction *Inst); + bool programUndefinedIfPoison(const Instruction *Inst); + + /// canCreateUndefOrPoison returns true if Op can create undef or poison from + /// non-undef & non-poison operands. + /// For vectors, canCreateUndefOrPoison returns true if there is potential + /// poison or undef in any element of the result when vectors without + /// undef/poison poison are given as operands. + /// For example, given `Op = shl <2 x i32> %x, <0, 32>`, this function returns + /// true. If Op raises immediate UB but never creates poison or undef + /// (e.g. sdiv I, 0), canCreatePoison returns false. + /// + /// canCreatePoison returns true if Op can create poison from non-poison /// operands. - /// For example, given `I = shl <2 x i32> %x, <0, 32>`, this function returns - /// true. If I raises immediate UB but never creates poison (e.g. sdiv I, 0), - /// canCreatePoison returns false. - bool canCreatePoison(const Instruction *I); - - /// Return true if this function can prove that V is never undef value - /// or poison value. - // + bool canCreateUndefOrPoison(const Operator *Op); + bool canCreatePoison(const Operator *Op); + + /// Return true if V is poison given that ValAssumedPoison is already poison. + /// For example, if ValAssumedPoison is `icmp X, 10` and V is `icmp X, 5`, + /// impliesPoison returns true. + bool impliesPoison(const Value *ValAssumedPoison, const Value *V); + + /// Return true if this function can prove that V does not have undef bits + /// and is never poison. If V is an aggregate value or vector, check whether + /// all elements (except padding) are not undef or poison. + /// Note that this is different from canCreateUndefOrPoison because the + /// function assumes Op's operands are not poison/undef. + /// /// If CtxI and DT are specified this method performs flow-sensitive analysis /// and returns true if it is guaranteed to be never undef or poison /// immediately before the CtxI. bool isGuaranteedNotToBeUndefOrPoison(const Value *V, + AssumptionCache *AC = nullptr, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr, unsigned Depth = 0); + bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC = nullptr, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + unsigned Depth = 0); /// Specific patterns of select instructions we can match. enum SelectPatternFlavor { @@ -700,6 +733,14 @@ class Value; /// minimum/maximum flavor. CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF); + /// Check if the values in \p VL are select instructions that can be converted + /// to a min or max (vector) intrinsic. Returns the intrinsic ID, if such a + /// conversion is possible, together with a bool indicating whether all select + /// conditions are only used by the selects. Otherwise return + /// Intrinsic::not_intrinsic. + std::pair<Intrinsic::ID, bool> + canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL); + /// Return true if RHS is known to be implied true by LHS. Return false if /// RHS is known to be implied false by LHS. Otherwise, return None if no /// implication can be made. diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index 2f64b0fedc7a..cfc3d6115866 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -62,6 +62,87 @@ TLI_DEFINE_VECFUNC("acoshf", "vacoshf", 4) TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4) +#elif defined(TLI_DEFINE_LIBMVEC_X86_VECFUNCS) +// GLIBC Vector math Functions + +TLI_DEFINE_VECFUNC("sin", "_ZGVbN2v_sin", 2) +TLI_DEFINE_VECFUNC("sin", "_ZGVdN4v_sin", 4) + +TLI_DEFINE_VECFUNC("sinf", "_ZGVbN4v_sinf", 4) +TLI_DEFINE_VECFUNC("sinf", "_ZGVdN8v_sinf", 8) + +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVbN2v_sin", 2) +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVdN4v_sin", 4) + +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVbN4v_sinf", 4) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVdN8v_sinf", 8) + +TLI_DEFINE_VECFUNC("cos", "_ZGVbN2v_cos", 2) +TLI_DEFINE_VECFUNC("cos", "_ZGVdN4v_cos", 4) + +TLI_DEFINE_VECFUNC("cosf", "_ZGVbN4v_cosf", 4) +TLI_DEFINE_VECFUNC("cosf", "_ZGVdN8v_cosf", 8) + +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVbN2v_cos", 2) +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVdN4v_cos", 4) + +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVbN4v_cosf", 4) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVdN8v_cosf", 8) + +TLI_DEFINE_VECFUNC("pow", "_ZGVbN2vv_pow", 2) +TLI_DEFINE_VECFUNC("pow", "_ZGVdN4vv_pow", 4) + +TLI_DEFINE_VECFUNC("powf", "_ZGVbN4vv_powf", 4) +TLI_DEFINE_VECFUNC("powf", "_ZGVdN8vv_powf", 8) + +TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVbN2vv___pow_finite", 2) +TLI_DEFINE_VECFUNC("__pow_finite", "_ZGVdN4vv___pow_finite", 4) + +TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVbN4vv___powf_finite", 4) +TLI_DEFINE_VECFUNC("__powf_finite", "_ZGVdN8vv___powf_finite", 8) + +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVbN2vv_pow", 2) +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVdN4vv_pow", 4) + +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVbN4vv_powf", 4) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVdN8vv_powf", 8) + +TLI_DEFINE_VECFUNC("exp", "_ZGVbN2v_exp", 2) +TLI_DEFINE_VECFUNC("exp", "_ZGVdN4v_exp", 4) + +TLI_DEFINE_VECFUNC("expf", "_ZGVbN4v_expf", 4) +TLI_DEFINE_VECFUNC("expf", "_ZGVdN8v_expf", 8) + +TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVbN2v___exp_finite", 2) +TLI_DEFINE_VECFUNC("__exp_finite", "_ZGVdN4v___exp_finite", 4) + +TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVbN4v___expf_finite", 4) +TLI_DEFINE_VECFUNC("__expf_finite", "_ZGVdN8v___expf_finite", 8) + +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVbN2v_exp", 2) +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVdN4v_exp", 4) + +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVbN4v_expf", 4) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVdN8v_expf", 8) + +TLI_DEFINE_VECFUNC("log", "_ZGVbN2v_log", 2) +TLI_DEFINE_VECFUNC("log", "_ZGVdN4v_log", 4) + +TLI_DEFINE_VECFUNC("logf", "_ZGVbN4v_logf", 4) +TLI_DEFINE_VECFUNC("logf", "_ZGVdN8v_logf", 8) + +TLI_DEFINE_VECFUNC("__log_finite", "_ZGVbN2v___log_finite", 2) +TLI_DEFINE_VECFUNC("__log_finite", "_ZGVdN4v___log_finite", 4) + +TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVbN4v___logf_finite", 4) +TLI_DEFINE_VECFUNC("__logf_finite", "_ZGVdN8v___logf_finite", 8) + +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVbN2v_log", 2) +TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", 4) + +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", 4) +TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", 8) + #elif defined(TLI_DEFINE_MASSV_VECFUNCS) // IBM MASS library's vector Functions @@ -245,6 +326,70 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4) TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8) TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16) +TLI_DEFINE_VECFUNC("log2", "__svml_log22", 2) +TLI_DEFINE_VECFUNC("log2", "__svml_log24", 4) +TLI_DEFINE_VECFUNC("log2", "__svml_log28", 8) + +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f4", 4) +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f8", 8) +TLI_DEFINE_VECFUNC("log2f", "__svml_log2f16", 16) + +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log22", 2) +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log24", 4) +TLI_DEFINE_VECFUNC("__log2_finite", "__svml_log28", 8) + +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f4", 4) +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f8", 8) +TLI_DEFINE_VECFUNC("__log2f_finite", "__svml_log2f16", 16) + +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log22", 2) +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log24", 4) +TLI_DEFINE_VECFUNC("llvm.log2.f64", "__svml_log28", 8) + +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", 4) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", 8) +TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", 16) + +TLI_DEFINE_VECFUNC("log10", "__svml_log102", 2) +TLI_DEFINE_VECFUNC("log10", "__svml_log104", 4) +TLI_DEFINE_VECFUNC("log10", "__svml_log108", 8) + +TLI_DEFINE_VECFUNC("log10f", "__svml_log10f4", 4) +TLI_DEFINE_VECFUNC("log10f", "__svml_log10f8", 8) +TLI_DEFINE_VECFUNC("log10f", "__svml_log10f16", 16) + +TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log102", 2) +TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log104", 4) +TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log108", 8) + +TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f4", 4) +TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f8", 8) +TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f16", 16) + +TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log102", 2) +TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log104", 4) +TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log108", 8) + +TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f4", 4) +TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f8", 8) +TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f16", 16) + +TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt2", 2) +TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt4", 4) +TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt8", 8) + +TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf4", 4) +TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf8", 8) +TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf16", 16) + +TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt2", 2) +TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt4", 4) +TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt8", 8) + +TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", 4) +TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", 8) +TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", 16) + TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2) TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4) TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8) @@ -275,6 +420,7 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", 16) #undef TLI_DEFINE_VECFUNC #undef TLI_DEFINE_ACCELERATE_VECFUNCS +#undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS #undef TLI_DEFINE_MASSV_VECFUNCS #undef TLI_DEFINE_SVML_VECFUNCS #undef TLI_DEFINE_MASSV_VECFUNCS_NAMES diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index b1d7850442fb..26cb0e456ed4 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -14,12 +14,12 @@ #define LLVM_ANALYSIS_VECTORUTILS_H #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopAccessAnalysis.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Support/CheckedArithmetic.h" namespace llvm { +class TargetLibraryInfo; /// Describes the type of Parameters enum class VFParamKind { @@ -99,7 +99,8 @@ struct VFShape { // Retrieve the VFShape that can be used to map a (scalar) function to itself, // with VF = 1. static VFShape getScalarShape(const CallInst &CI) { - return VFShape::get(CI, /*EC*/ {1, false}, /*HasGlobalPredicate*/ false); + return VFShape::get(CI, ElementCount::getFixed(1), + /*HasGlobalPredicate*/ false); } // Retrieve the basic vectorization shape of the function, where all @@ -114,7 +115,7 @@ struct VFShape { Parameters.push_back( VFParameter({CI.arg_size(), VFParamKind::GlobalPredicate})); - return {EC.Min, EC.Scalable, Parameters}; + return {EC.getKnownMinValue(), EC.isScalable(), Parameters}; } /// Sanity check on the Parameters in the VFShape. bool hasValidParameterList() const; @@ -299,13 +300,17 @@ namespace Intrinsic { typedef unsigned ID; } -/// A helper function for converting Scalar types to vector types. -/// If the incoming type is void, we return void. If the VF is 1, we return -/// the scalar type. -inline Type *ToVectorTy(Type *Scalar, unsigned VF, bool isScalable = false) { - if (Scalar->isVoidTy() || VF == 1) +/// A helper function for converting Scalar types to vector types. If +/// the incoming type is void, we return void. If the EC represents a +/// scalar, we return the scalar type. +inline Type *ToVectorTy(Type *Scalar, ElementCount EC) { + if (Scalar->isVoidTy() || Scalar->isMetadataTy() || EC.isScalar()) return Scalar; - return VectorType::get(Scalar, {VF, isScalable}); + return VectorType::get(Scalar, EC); +} + +inline Type *ToVectorTy(Type *Scalar, unsigned VF) { + return ToVectorTy(Scalar, ElementCount::getFixed(VF)); } /// Identify if the intrinsic is trivially vectorizable. @@ -353,7 +358,7 @@ int getSplatIndex(ArrayRef<int> Mask); /// Get splat value if the input is a splat vector or return nullptr. /// The value may be extracted from a splat constants vector or from /// a sequence of instructions that broadcast a single value into a vector. -const Value *getSplatValue(const Value *V); +Value *getSplatValue(const Value *V); /// Return true if each element of the vector value \p V is poisoned or equal to /// every other non-poisoned element. If an index element is specified, either @@ -539,20 +544,20 @@ createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs); /// elements, it will be padded with undefs. Value *concatenateVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vecs); -/// Given a mask vector of the form <Y x i1>, Return true if all of the -/// elements of this predicate mask are false or undef. That is, return true -/// if all lanes can be assumed inactive. +/// Given a mask vector of i1, Return true if all of the elements of this +/// predicate mask are known to be false or undef. That is, return true if all +/// lanes can be assumed inactive. bool maskIsAllZeroOrUndef(Value *Mask); -/// Given a mask vector of the form <Y x i1>, Return true if all of the -/// elements of this predicate mask are true or undef. That is, return true -/// if all lanes can be assumed active. +/// Given a mask vector of i1, Return true if all of the elements of this +/// predicate mask are known to be true or undef. That is, return true if all +/// lanes can be assumed active. bool maskIsAllOneOrUndef(Value *Mask); /// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) /// for each lane which may be active. APInt possiblyDemandedEltsInMask(Value *Mask); - + /// The group of interleaved loads/stores sharing the same stride and /// close to each other. /// @@ -615,6 +620,11 @@ public: return false; int32_t Key = *MaybeKey; + // Skip if the key is used for either the tombstone or empty special values. + if (DenseMapInfo<int32_t>::getTombstoneKey() == Key || + DenseMapInfo<int32_t>::getEmptyKey() == Key) + return false; + // Skip if there is already a member with the same index. if (Members.find(Key) != Members.end()) return false; @@ -650,11 +660,7 @@ public: /// \returns nullptr if contains no such member. InstTy *getMember(uint32_t Index) const { int32_t Key = SmallestKey + Index; - auto Member = Members.find(Key); - if (Member == Members.end()) - return nullptr; - - return Member->second; + return Members.lookup(Key); } /// Get the index for the given member. Unlike the key in the member @@ -772,9 +778,7 @@ public: /// \returns nullptr if doesn't have such group. InterleaveGroup<Instruction> * getInterleaveGroup(const Instruction *Instr) const { - if (InterleaveGroupMap.count(Instr)) - return InterleaveGroupMap.find(Instr)->second; - return nullptr; + return InterleaveGroupMap.lookup(Instr); } iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>> diff --git a/llvm/include/llvm/BinaryFormat/COFF.h b/llvm/include/llvm/BinaryFormat/COFF.h index 1919d7f0dece..716d649f7c51 100644 --- a/llvm/include/llvm/BinaryFormat/COFF.h +++ b/llvm/include/llvm/BinaryFormat/COFF.h @@ -311,6 +311,7 @@ enum SectionCharacteristics : uint32_t { IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000, IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000, IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000, + IMAGE_SCN_ALIGN_MASK = 0x00F00000, IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000, IMAGE_SCN_MEM_DISCARDABLE = 0x02000000, IMAGE_SCN_MEM_NOT_CACHED = 0x04000000, diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index f0337ef4fb54..f69877bb50df 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -17,7 +17,7 @@ defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \ defined HANDLE_DW_CC || defined HANDLE_DW_LNS || defined HANDLE_DW_LNE || \ defined HANDLE_DW_LNCT || defined HANDLE_DW_MACRO || \ - defined HANDLE_MACRO_FLAG || \ + defined HANDLE_DW_MACRO_GNU || defined HANDLE_MACRO_FLAG || \ defined HANDLE_DW_RLE || defined HANDLE_DW_LLE || \ (defined HANDLE_DW_CFA && defined HANDLE_DW_CFA_PRED) || \ defined HANDLE_DW_APPLE_PROPERTY || defined HANDLE_DW_UT || \ @@ -88,6 +88,10 @@ #define HANDLE_DW_MACRO(ID, NAME) #endif +#ifndef HANDLE_DW_MACRO_GNU +#define HANDLE_DW_MACRO_GNU(ID, NAME) +#endif + #ifndef HANDLE_MACRO_FLAG #define HANDLE_MACRO_FLAG(ID, NAME) #endif @@ -837,6 +841,18 @@ HANDLE_DW_MACRO(0x0a, import_sup) HANDLE_DW_MACRO(0x0b, define_strx) HANDLE_DW_MACRO(0x0c, undef_strx) +// GNU .debug_macro extension. +HANDLE_DW_MACRO_GNU(0x01, define) +HANDLE_DW_MACRO_GNU(0x02, undef) +HANDLE_DW_MACRO_GNU(0x03, start_file) +HANDLE_DW_MACRO_GNU(0x04, end_file) +HANDLE_DW_MACRO_GNU(0x05, define_indirect) +HANDLE_DW_MACRO_GNU(0x06, undef_indirect) +HANDLE_DW_MACRO_GNU(0x07, transparent_include) +HANDLE_DW_MACRO_GNU(0x08, define_indirect_alt) +HANDLE_DW_MACRO_GNU(0x09, undef_indirect_alt) +HANDLE_DW_MACRO_GNU(0x0a, transparent_include_alt) + // DWARF v5 Macro header flags. HANDLE_MACRO_FLAG(0x01, OFFSET_SIZE) HANDLE_MACRO_FLAG(0x02, DEBUG_LINE_OFFSET) @@ -986,6 +1002,7 @@ HANDLE_DW_SECT(8, RNGLISTS) #undef HANDLE_DW_LNE #undef HANDLE_DW_LNCT #undef HANDLE_DW_MACRO +#undef HANDLE_DW_MACRO_GNU #undef HANDLE_MACRO_FLAG #undef HANDLE_DW_RLE #undef HANDLE_DW_LLE diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h index 4e8b708f39bb..cafc5be686bc 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.h +++ b/llvm/include/llvm/BinaryFormat/Dwarf.h @@ -27,6 +27,8 @@ #include "llvm/Support/FormatVariadicDetails.h" #include "llvm/ADT/Triple.h" +#include <limits> + namespace llvm { class StringRef; @@ -118,10 +120,11 @@ enum LocationAtom { #include "llvm/BinaryFormat/Dwarf.def" DW_OP_lo_user = 0xe0, DW_OP_hi_user = 0xff, - DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata. - DW_OP_LLVM_convert = 0x1001, ///< Only used in LLVM metadata. - DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata. - DW_OP_LLVM_entry_value = 0x1003, ///< Only used in LLVM metadata. + DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata. + DW_OP_LLVM_convert = 0x1001, ///< Only used in LLVM metadata. + DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata. + DW_OP_LLVM_entry_value = 0x1003, ///< Only used in LLVM metadata. + DW_OP_LLVM_implicit_pointer = 0x1004, ///< Only used in LLVM metadata. }; enum TypeKind : uint8_t { @@ -183,6 +186,7 @@ enum SourceLanguage { }; inline bool isCPlusPlus(SourceLanguage S) { + bool result = false; // Deliberately enumerate all the language options so we get a warning when // new language options are added (-Wswitch) that'll hopefully help keep this // switch up-to-date when new C++ versions are added. @@ -191,7 +195,8 @@ inline bool isCPlusPlus(SourceLanguage S) { case DW_LANG_C_plus_plus_03: case DW_LANG_C_plus_plus_11: case DW_LANG_C_plus_plus_14: - return true; + result = true; + break; case DW_LANG_C89: case DW_LANG_C: case DW_LANG_Ada83: @@ -230,9 +235,68 @@ inline bool isCPlusPlus(SourceLanguage S) { case DW_LANG_BORLAND_Delphi: case DW_LANG_lo_user: case DW_LANG_hi_user: - return false; + result = false; + break; } - llvm_unreachable("Invalid source language"); + + return result; +} + +inline bool isFortran(SourceLanguage S) { + bool result = false; + // Deliberately enumerate all the language options so we get a warning when + // new language options are added (-Wswitch) that'll hopefully help keep this + // switch up-to-date when new Fortran versions are added. + switch (S) { + case DW_LANG_Fortran77: + case DW_LANG_Fortran90: + case DW_LANG_Fortran95: + case DW_LANG_Fortran03: + case DW_LANG_Fortran08: + result = true; + break; + case DW_LANG_C89: + case DW_LANG_C: + case DW_LANG_Ada83: + case DW_LANG_C_plus_plus: + case DW_LANG_Cobol74: + case DW_LANG_Cobol85: + case DW_LANG_Pascal83: + case DW_LANG_Modula2: + case DW_LANG_Java: + case DW_LANG_C99: + case DW_LANG_Ada95: + case DW_LANG_PLI: + case DW_LANG_ObjC: + case DW_LANG_ObjC_plus_plus: + case DW_LANG_UPC: + case DW_LANG_D: + case DW_LANG_Python: + case DW_LANG_OpenCL: + case DW_LANG_Go: + case DW_LANG_Modula3: + case DW_LANG_Haskell: + case DW_LANG_C_plus_plus_03: + case DW_LANG_C_plus_plus_11: + case DW_LANG_OCaml: + case DW_LANG_Rust: + case DW_LANG_C11: + case DW_LANG_Swift: + case DW_LANG_Julia: + case DW_LANG_Dylan: + case DW_LANG_C_plus_plus_14: + case DW_LANG_RenderScript: + case DW_LANG_BLISS: + case DW_LANG_Mips_Assembler: + case DW_LANG_GOOGLE_RenderScript: + case DW_LANG_BORLAND_Delphi: + case DW_LANG_lo_user: + case DW_LANG_hi_user: + result = false; + break; + } + + return result; } enum CaseSensitivity { @@ -309,6 +373,14 @@ enum MacroEntryType { DW_MACRO_hi_user = 0xff }; +/// GNU .debug_macro macro information entry type encodings. +enum GnuMacroEntryType { +#define HANDLE_DW_MACRO_GNU(ID, NAME) DW_MACRO_GNU_##NAME = ID, +#include "llvm/BinaryFormat/Dwarf.def" + DW_MACRO_GNU_lo_user = 0xe0, + DW_MACRO_GNU_hi_user = 0xff +}; + /// DWARF v5 range list entry encoding values. enum RnglistEntries { #define HANDLE_DW_RLE(ID, NAME) DW_RLE_##NAME = ID, @@ -472,6 +544,7 @@ StringRef LNStandardString(unsigned Standard); StringRef LNExtendedString(unsigned Encoding); StringRef MacinfoString(unsigned Encoding); StringRef MacroString(unsigned Encoding); +StringRef GnuMacroString(unsigned Encoding); StringRef RangeListEncodingString(unsigned Encoding); StringRef LocListEncodingString(unsigned Encoding); StringRef CallFrameString(unsigned Encoding, Triple::ArchType Arch); @@ -483,6 +556,7 @@ StringRef GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage); StringRef IndexString(unsigned Idx); StringRef FormatString(DwarfFormat Format); StringRef FormatString(bool IsDWARF64); +StringRef RLEString(unsigned RLE); /// @} /// \defgroup DwarfConstantsParsing Dwarf constants parsing functions @@ -674,6 +748,11 @@ template <> struct EnumTraits<LocationAtom> : public std::true_type { static constexpr char Type[3] = "OP"; static constexpr StringRef (*StringFn)(unsigned) = &OperationEncodingString; }; + +inline uint64_t computeTombstoneAddress(uint8_t AddressByteSize) { + return std::numeric_limits<uint64_t>::max() >> (8 - AddressByteSize) * 8; +} + } // End of namespace dwarf /// Dwarf constants format_provider diff --git a/llvm/include/llvm/BinaryFormat/DynamicTags.def b/llvm/include/llvm/BinaryFormat/DynamicTags.def index aec408bd2d72..c08f8a53bdb5 100644 --- a/llvm/include/llvm/BinaryFormat/DynamicTags.def +++ b/llvm/include/llvm/BinaryFormat/DynamicTags.def @@ -120,6 +120,7 @@ DYNAMIC_TAG(VERNEEDNUM, 0X6FFFFFFF) // The number of entries in DT_VERNEED. // AArch64 specific dynamic table entries AARCH64_DYNAMIC_TAG(AARCH64_BTI_PLT, 0x70000001) AARCH64_DYNAMIC_TAG(AARCH64_PAC_PLT, 0x70000003) +AARCH64_DYNAMIC_TAG(AARCH64_VARIANT_PCS, 0x70000005) // Hexagon specific dynamic table entries HEXAGON_DYNAMIC_TAG(HEXAGON_SYMSZ, 0x70000000) diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index bdcf10fd1640..1552303b610c 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -107,13 +107,17 @@ struct Elf64_Ehdr { unsigned char getDataEncoding() const { return e_ident[EI_DATA]; } }; -// File types +// File types. +// See current registered ELF types at: +// http://www.sco.com/developers/gabi/latest/ch4.eheader.html enum { ET_NONE = 0, // No file type ET_REL = 1, // Relocatable file ET_EXEC = 2, // Executable file ET_DYN = 3, // Shared object file ET_CORE = 4, // Core file + ET_LOOS = 0xfe00, // Beginning of operating system-specific codes + ET_HIOS = 0xfeff, // Operating system-specific ET_LOPROC = 0xff00, // Beginning of processor-specific codes ET_HIPROC = 0xffff // Processor-specific }; @@ -312,6 +316,7 @@ enum { EM_LANAI = 244, // Lanai 32-bit processor EM_BPF = 247, // Linux kernel bpf virtual machine EM_VE = 251, // NEC SX-Aurora VE + EM_CSKY = 252, // C-SKY 32-bit processor }; // Object file classes. @@ -359,6 +364,14 @@ enum { ELFOSABI_LAST_ARCH = 255 // Last Architecture-specific OS ABI }; +// AMDGPU OS ABI Version identification. +enum { + // ELFABIVERSION_AMDGPU_HSA_V1 does not exist because OS ABI identification + // was never defined for V1. + ELFABIVERSION_AMDGPU_HSA_V2 = 0, + ELFABIVERSION_AMDGPU_HSA_V3 = 1, +}; + #define ELF_RELOC(name, value) name = value, // X86_64 relocations. @@ -405,6 +418,12 @@ enum { #include "ELFRelocs/AArch64.def" }; +// Special values for the st_other field in the symbol table entry for AArch64. +enum { + // Symbol may follow different calling convention than base PCS. + STO_AARCH64_VARIANT_PCS = 0x80 +}; + // ARM Specific e_flags enum : unsigned { EF_ARM_SOFT_FLOAT = 0x00000200U, // Legacy pre EABI_VER5 @@ -680,41 +699,39 @@ enum : unsigned { EF_AMDGPU_MACH_R600_LAST = EF_AMDGPU_MACH_R600_TURKS, // AMDGCN-based processors. - - // AMDGCN GFX6. - EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020, - EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021, - // AMDGCN GFX7. - EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022, - EF_AMDGPU_MACH_AMDGCN_GFX701 = 0x023, - EF_AMDGPU_MACH_AMDGCN_GFX702 = 0x024, - EF_AMDGPU_MACH_AMDGCN_GFX703 = 0x025, - EF_AMDGPU_MACH_AMDGCN_GFX704 = 0x026, - // AMDGCN GFX8. - EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028, - EF_AMDGPU_MACH_AMDGCN_GFX802 = 0x029, - EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a, - EF_AMDGPU_MACH_AMDGCN_GFX810 = 0x02b, - // AMDGCN GFX9. - EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c, - EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d, - EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e, - EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f, - EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030, - EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031, - // AMDGCN GFX10. - EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033, - EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034, - EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035, - EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036, - - // Reserved for AMDGCN-based processors. - EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027, - EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x032, + EF_AMDGPU_MACH_AMDGCN_GFX600 = 0x020, + EF_AMDGPU_MACH_AMDGCN_GFX601 = 0x021, + EF_AMDGPU_MACH_AMDGCN_GFX700 = 0x022, + EF_AMDGPU_MACH_AMDGCN_GFX701 = 0x023, + EF_AMDGPU_MACH_AMDGCN_GFX702 = 0x024, + EF_AMDGPU_MACH_AMDGCN_GFX703 = 0x025, + EF_AMDGPU_MACH_AMDGCN_GFX704 = 0x026, + EF_AMDGPU_MACH_AMDGCN_RESERVED_0X27 = 0x027, + EF_AMDGPU_MACH_AMDGCN_GFX801 = 0x028, + EF_AMDGPU_MACH_AMDGCN_GFX802 = 0x029, + EF_AMDGPU_MACH_AMDGCN_GFX803 = 0x02a, + EF_AMDGPU_MACH_AMDGCN_GFX810 = 0x02b, + EF_AMDGPU_MACH_AMDGCN_GFX900 = 0x02c, + EF_AMDGPU_MACH_AMDGCN_GFX902 = 0x02d, + EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e, + EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f, + EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030, + EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031, + EF_AMDGPU_MACH_AMDGCN_GFX90C = 0x032, + EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033, + EF_AMDGPU_MACH_AMDGCN_GFX1011 = 0x034, + EF_AMDGPU_MACH_AMDGCN_GFX1012 = 0x035, + EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036, + EF_AMDGPU_MACH_AMDGCN_GFX1031 = 0x037, + EF_AMDGPU_MACH_AMDGCN_GFX1032 = 0x038, + EF_AMDGPU_MACH_AMDGCN_GFX1033 = 0x039, + EF_AMDGPU_MACH_AMDGCN_GFX602 = 0x03a, + EF_AMDGPU_MACH_AMDGCN_GFX705 = 0x03b, + EF_AMDGPU_MACH_AMDGCN_GFX805 = 0x03c, // First/last AMDGCN-based processors. EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600, - EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1030, + EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX805, // Indicates if the "xnack" target feature is enabled for all code contained // in the object. @@ -771,6 +788,12 @@ enum { #include "ELFRelocs/VE.def" }; + +// ELF Relocation types for CSKY +enum { +#include "ELFRelocs/CSKY.def" +}; + #undef ELF_RELOC // Section header. @@ -848,10 +871,11 @@ enum : unsigned { SHT_LLVM_ADDRSIG = 0x6fff4c03, // List of address-significant symbols // for safe ICF. SHT_LLVM_DEPENDENT_LIBRARIES = - 0x6fff4c04, // LLVM Dependent Library Specifiers. - SHT_LLVM_SYMPART = 0x6fff4c05, // Symbol partition specification. - SHT_LLVM_PART_EHDR = 0x6fff4c06, // ELF header for loadable partition. - SHT_LLVM_PART_PHDR = 0x6fff4c07, // Phdrs for loadable partition. + 0x6fff4c04, // LLVM Dependent Library Specifiers. + SHT_LLVM_SYMPART = 0x6fff4c05, // Symbol partition specification. + SHT_LLVM_PART_EHDR = 0x6fff4c06, // ELF header for loadable partition. + SHT_LLVM_PART_PHDR = 0x6fff4c07, // Phdrs for loadable partition. + SHT_LLVM_BB_ADDR_MAP = 0x6fff4c08, // LLVM Basic Block Address Map. // Android's experimental support for SHT_RELR sections. // https://android.googlesource.com/platform/bionic/+/b7feec74547f84559a1467aca02708ff61346d2a/libc/include/elf.h#512 SHT_ANDROID_RELR = 0x6fffff00, // Relocation entries; only offsets. diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def new file mode 100644 index 000000000000..c5f2dbae8033 --- /dev/null +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/CSKY.def @@ -0,0 +1,74 @@ + +#ifndef ELF_RELOC +#error "ELF_RELOC must be defined" +#endif + +ELF_RELOC(R_CKCORE_NONE, 0) +ELF_RELOC(R_CKCORE_ADDR32, 1) +ELF_RELOC(R_CKCORE_PCREL_IMM8_4, 2) +ELF_RELOC(R_CKCORE_PCREL_IMM11_2, 3) +ELF_RELOC(R_CKCORE_PCREL_IMM4_2, 4) +ELF_RELOC(R_CKCORE_PCREL32, 5) +ELF_RELOC(R_CKCORE_PCREL_JSR_IMM11_2, 6) +ELF_RELOC(R_CKCORE_GNU_VTINHERIT, 7) +ELF_RELOC(R_CKCORE_GNU_VTENTRY, 8) +ELF_RELOC(R_CKCORE_RELATIVE, 9) +ELF_RELOC(R_CKCORE_COPY, 10) +ELF_RELOC(R_CKCORE_GLOB_DAT, 11) +ELF_RELOC(R_CKCORE_JUMP_SLOT, 12) +ELF_RELOC(R_CKCORE_GOTOFF, 13) +ELF_RELOC(R_CKCORE_GOTPC, 14) +ELF_RELOC(R_CKCORE_GOT32, 15) +ELF_RELOC(R_CKCORE_PLT32, 16) +ELF_RELOC(R_CKCORE_ADDRGOT, 17) +ELF_RELOC(R_CKCORE_ADDRPLT, 18) +ELF_RELOC(R_CKCORE_PCREL_IMM26_2, 19) +ELF_RELOC(R_CKCORE_PCREL_IMM16_2, 20) +ELF_RELOC(R_CKCORE_PCREL_IMM16_4, 21) +ELF_RELOC(R_CKCORE_PCREL_IMM10_2, 22) +ELF_RELOC(R_CKCORE_PCREL_IMM10_4, 23) +ELF_RELOC(R_CKCORE_ADDR_HI16, 24) +ELF_RELOC(R_CKCORE_ADDR_LO16, 25) +ELF_RELOC(R_CKCORE_GOTPC_HI16, 26) +ELF_RELOC(R_CKCORE_GOTPC_LO16, 27) +ELF_RELOC(R_CKCORE_GOTOFF_HI16, 28) +ELF_RELOC(R_CKCORE_GOTOFF_LO16, 29) +ELF_RELOC(R_CKCORE_GOT12, 30) +ELF_RELOC(R_CKCORE_GOT_HI16, 31) +ELF_RELOC(R_CKCORE_GOT_LO16, 32) +ELF_RELOC(R_CKCORE_PLT12, 33) +ELF_RELOC(R_CKCORE_PLT_HI16, 34) +ELF_RELOC(R_CKCORE_PLT_LO16, 35) +ELF_RELOC(R_CKCORE_ADDRGOT_HI16, 36) +ELF_RELOC(R_CKCORE_ADDRGOT_LO16, 37) +ELF_RELOC(R_CKCORE_ADDRPLT_HI16, 38) +ELF_RELOC(R_CKCORE_ADDRPLT_LO16, 39) +ELF_RELOC(R_CKCORE_PCREL_JSR_IMM26_2, 40) +ELF_RELOC(R_CKCORE_TOFFSET_LO16, 41) +ELF_RELOC(R_CKCORE_DOFFSET_LO16, 42) +ELF_RELOC(R_CKCORE_PCREL_IMM18_2, 43) +ELF_RELOC(R_CKCORE_DOFFSET_IMM18, 44) +ELF_RELOC(R_CKCORE_DOFFSET_IMM18_2, 45) +ELF_RELOC(R_CKCORE_DOFFSET_IMM18_4, 46) +ELF_RELOC(R_CKCORE_GOTOFF_IMM18, 47) +ELF_RELOC(R_CKCORE_GOT_IMM18_4, 48) +ELF_RELOC(R_CKCORE_PLT_IMM18_4, 49) +ELF_RELOC(R_CKCORE_PCREL_IMM7_4, 50) +ELF_RELOC(R_CKCORE_TLS_LE32, 51) +ELF_RELOC(R_CKCORE_TLS_IE32, 52) +ELF_RELOC(R_CKCORE_TLS_GD32, 53) +ELF_RELOC(R_CKCORE_TLS_LDM32, 54) +ELF_RELOC(R_CKCORE_TLS_LDO32, 55) +ELF_RELOC(R_CKCORE_TLS_DTPMOD32, 56) +ELF_RELOC(R_CKCORE_TLS_DTPOFF32, 57) +ELF_RELOC(R_CKCORE_TLS_TPOFF32, 58) +ELF_RELOC(R_CKCORE_PCREL_FLRW_IMM8_4, 59) +ELF_RELOC(R_CKCORE_NOJSRI, 60) +ELF_RELOC(R_CKCORE_CALLGRAPH, 61) +ELF_RELOC(R_CKCORE_IRELATIVE, 62) +ELF_RELOC(R_CKCORE_PCREL_BLOOP_IMM4_4, 63) +ELF_RELOC(R_CKCORE_PCREL_BLOOP_IMM12_4, 64) +ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_1, 65) +ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_2, 66) +ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_4, 67) +ELF_RELOC(R_CKCORE_PCREL_VLRW_IMM12_8, 68) diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def index e28c9caaefaf..0422aa0606d7 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def @@ -97,8 +97,14 @@ #undef R_PPC64_DTPREL16_HIGH #undef R_PPC64_DTPREL16_HIGHA #undef R_PPC64_REL24_NOTOC +#undef R_PPC64_PCREL_OPT #undef R_PPC64_PCREL34 #undef R_PPC64_GOT_PCREL34 +#undef R_PPC64_TPREL34 +#undef R_PPC64_DTPREL34 +#undef R_PPC64_GOT_TLSGD_PCREL34 +#undef R_PPC64_GOT_TLSLD_PCREL34 +#undef R_PPC64_GOT_TPREL_PCREL34 #undef R_PPC64_IRELATIVE #undef R_PPC64_REL16 #undef R_PPC64_REL16_LO @@ -194,8 +200,14 @@ ELF_RELOC(R_PPC64_TPREL16_HIGHA, 113) ELF_RELOC(R_PPC64_DTPREL16_HIGH, 114) ELF_RELOC(R_PPC64_DTPREL16_HIGHA, 115) ELF_RELOC(R_PPC64_REL24_NOTOC, 116) +ELF_RELOC(R_PPC64_PCREL_OPT, 123) ELF_RELOC(R_PPC64_PCREL34, 132) ELF_RELOC(R_PPC64_GOT_PCREL34, 133) +ELF_RELOC(R_PPC64_TPREL34, 146) +ELF_RELOC(R_PPC64_DTPREL34, 147) +ELF_RELOC(R_PPC64_GOT_TLSGD_PCREL34, 148) +ELF_RELOC(R_PPC64_GOT_TLSLD_PCREL34, 149) +ELF_RELOC(R_PPC64_GOT_TPREL_PCREL34, 150) ELF_RELOC(R_PPC64_IRELATIVE, 248) ELF_RELOC(R_PPC64_REL16, 249) ELF_RELOC(R_PPC64_REL16_LO, 250) diff --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h index e43fea0a2465..f5d5ec328b5e 100644 --- a/llvm/include/llvm/BinaryFormat/MachO.h +++ b/llvm/include/llvm/BinaryFormat/MachO.h @@ -83,6 +83,7 @@ enum { MH_NO_HEAP_EXECUTION = 0x01000000u, MH_APP_EXTENSION_SAFE = 0x02000000u, MH_NLIST_OUTOFSYNC_WITH_DYLDINFO = 0x04000000u, + MH_SIM_SUPPORT = 0x08000000u, MH_DYLIB_IN_CACHE = 0x80000000u, }; @@ -495,7 +496,8 @@ enum PlatformType { PLATFORM_MACCATALYST = 6, PLATFORM_IOSSIMULATOR = 7, PLATFORM_TVOSSIMULATOR = 8, - PLATFORM_WATCHOSSIMULATOR = 9 + PLATFORM_WATCHOSSIMULATOR = 9, + PLATFORM_DRIVERKIT = 10, }; // Values for tools enum in build_tool_version. @@ -1492,6 +1494,7 @@ enum CPUSubTypeARM { enum CPUSubTypeARM64 { CPU_SUBTYPE_ARM64_ALL = 0, + CPU_SUBTYPE_ARM64_V8 = 1, CPU_SUBTYPE_ARM64E = 2, }; diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index 1aca692e30a7..063c6a3f9449 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -41,7 +41,7 @@ struct WasmDylinkInfo { uint32_t MemoryAlignment; // P2 alignment of memory uint32_t TableSize; // Table size in elements uint32_t TableAlignment; // P2 alignment of table - std::vector<StringRef> Needed; // Shared library depenedencies + std::vector<StringRef> Needed; // Shared library dependencies }; struct WasmProducerInfo { @@ -67,11 +67,17 @@ struct WasmLimits { uint64_t Maximum; }; -struct WasmTable { +struct WasmTableType { uint8_t ElemType; WasmLimits Limits; }; +struct WasmTable { + uint32_t Index; + WasmTableType Type; + StringRef SymbolName; // from the "linking" section +}; + struct WasmInitExpr { uint8_t Opcode; union { @@ -114,7 +120,7 @@ struct WasmImport { union { uint32_t SigIndex; WasmGlobalType Global; - WasmTable Table; + WasmTableType Table; WasmLimits Memory; WasmEventType Event; }; @@ -140,8 +146,11 @@ struct WasmFunction { struct WasmDataSegment { uint32_t InitFlags; - uint32_t MemoryIndex; // present if InitFlags & WASM_SEGMENT_HAS_MEMINDEX - WasmInitExpr Offset; // present if InitFlags & WASM_SEGMENT_IS_PASSIVE == 0 + // Present if InitFlags & WASM_DATA_SEGMENT_HAS_MEMINDEX. + uint32_t MemoryIndex; + // Present if InitFlags & WASM_DATA_SEGMENT_IS_PASSIVE == 0. + WasmInitExpr Offset; + ArrayRef<uint8_t> Content; StringRef Name; // from the "segment info" section uint32_t Alignment; @@ -186,15 +195,22 @@ struct WasmSymbolInfo { // For symbols to be exported from the final module Optional<StringRef> ExportName; union { - // For function or global symbols, the index in function or global index - // space. + // For function, table, or global symbols, the index in function, table, or + // global index space. uint32_t ElementIndex; // For a data symbols, the address of the data relative to segment. WasmDataReference DataRef; }; }; -struct WasmFunctionName { +enum class NameType { + FUNCTION, + GLOBAL, + DATA_SEGMENT, +}; + +struct WasmDebugName { + NameType Type; uint32_t Index; StringRef Name; }; @@ -231,7 +247,6 @@ enum : unsigned { WASM_TYPE_F64 = 0x7C, WASM_TYPE_V128 = 0x7B, WASM_TYPE_FUNCREF = 0x70, - WASM_TYPE_EXNREF = 0x68, WASM_TYPE_EXTERNREF = 0x6F, WASM_TYPE_FUNC = 0x60, WASM_TYPE_NORESULT = 0x40, // for blocks with no result values @@ -251,6 +266,7 @@ enum : unsigned { WASM_OPCODE_END = 0x0b, WASM_OPCODE_CALL = 0x10, WASM_OPCODE_LOCAL_GET = 0x20, + WASM_OPCODE_LOCAL_SET = 0x21, WASM_OPCODE_GLOBAL_GET = 0x23, WASM_OPCODE_GLOBAL_SET = 0x24, WASM_OPCODE_I32_STORE = 0x36, @@ -287,8 +303,8 @@ enum : unsigned { }; enum : unsigned { - WASM_SEGMENT_IS_PASSIVE = 0x01, - WASM_SEGMENT_HAS_MEMINDEX = 0x02, + WASM_DATA_SEGMENT_IS_PASSIVE = 0x01, + WASM_DATA_SEGMENT_HAS_MEMINDEX = 0x02, }; // Feature policy prefixes used in the custom "target_features" section @@ -300,8 +316,10 @@ enum : uint8_t { // Kind codes used in the custom "name" section enum : unsigned { - WASM_NAMES_FUNCTION = 0x1, - WASM_NAMES_LOCAL = 0x2, + WASM_NAMES_FUNCTION = 1, + WASM_NAMES_LOCAL = 2, + WASM_NAMES_GLOBAL = 7, + WASM_NAMES_DATA_SEGMENT = 9, }; // Kind codes used in the custom "linking" section @@ -316,6 +334,8 @@ enum : unsigned { enum : unsigned { WASM_COMDAT_DATA = 0x0, WASM_COMDAT_FUNCTION = 0x1, + // GLOBAL, EVENT, and TABLE are in here but LLVM doesn't use them yet. + WASM_COMDAT_SECTION = 0x5, }; // Kind codes used in the custom "linking" section in the WASM_SYMBOL_TABLE @@ -325,6 +345,7 @@ enum WasmSymbolType : unsigned { WASM_SYMBOL_TYPE_GLOBAL = 0x2, WASM_SYMBOL_TYPE_SECTION = 0x3, WASM_SYMBOL_TYPE_EVENT = 0x4, + WASM_SYMBOL_TYPE_TABLE = 0x5, }; // Kinds of event attributes. @@ -360,7 +381,7 @@ enum class ValType { F32 = WASM_TYPE_F32, F64 = WASM_TYPE_F64, V128 = WASM_TYPE_V128, - EXNREF = WASM_TYPE_EXNREF, + FUNCREF = WASM_TYPE_FUNCREF, EXTERNREF = WASM_TYPE_EXTERNREF, }; diff --git a/llvm/include/llvm/BinaryFormat/WasmRelocs.def b/llvm/include/llvm/BinaryFormat/WasmRelocs.def index 05c5147e6314..dca63eca9455 100644 --- a/llvm/include/llvm/BinaryFormat/WasmRelocs.def +++ b/llvm/include/llvm/BinaryFormat/WasmRelocs.def @@ -20,3 +20,8 @@ WASM_RELOC(R_WASM_MEMORY_ADDR_LEB64, 14) WASM_RELOC(R_WASM_MEMORY_ADDR_SLEB64, 15) WASM_RELOC(R_WASM_MEMORY_ADDR_I64, 16) WASM_RELOC(R_WASM_MEMORY_ADDR_REL_SLEB64, 17) +WASM_RELOC(R_WASM_TABLE_INDEX_SLEB64, 18) +WASM_RELOC(R_WASM_TABLE_INDEX_I64, 19) +WASM_RELOC(R_WASM_TABLE_NUMBER_LEB, 20) +WASM_RELOC(R_WASM_MEMORY_ADDR_TLS_SLEB, 21) +WASM_RELOC(R_WASM_FUNCTION_OFFSET_I64, 22) diff --git a/llvm/include/llvm/Object/WasmTraits.h b/llvm/include/llvm/BinaryFormat/WasmTraits.h index 3eee8e71b187..e34182499187 100644 --- a/llvm/include/llvm/Object/WasmTraits.h +++ b/llvm/include/llvm/BinaryFormat/WasmTraits.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_OBJECT_WASMTRAITS_H -#define LLVM_OBJECT_WASMTRAITS_H +#ifndef LLVM_BINARYFORMAT_WASMTRAITS_H +#define LLVM_BINARYFORMAT_WASMTRAITS_H #include "llvm/ADT/Hashing.h" #include "llvm/BinaryFormat/Wasm.h" @@ -65,4 +65,4 @@ template <> struct DenseMapInfo<wasm::WasmGlobalType> { } // end namespace llvm -#endif // LLVM_OBJECT_WASMTRAITS_H +#endif // LLVM_BINARYFORMAT_WASMTRAITS_H diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h index 5a7ce80a2f62..48e1baf72689 100644 --- a/llvm/include/llvm/BinaryFormat/XCOFF.h +++ b/llvm/include/llvm/BinaryFormat/XCOFF.h @@ -18,6 +18,7 @@ namespace llvm { class StringRef; +template <unsigned> class SmallString; namespace XCOFF { @@ -28,6 +29,7 @@ constexpr size_t NameSize = 8; constexpr size_t SymbolTableEntrySize = 18; constexpr size_t RelocationSerializationSize32 = 10; constexpr uint16_t RelocOverflow = 65535; +constexpr uint8_t AllocRegNo = 31; enum ReservedSectionNum : int16_t { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 }; @@ -294,6 +296,115 @@ enum CFileCpuId : uint8_t { StringRef getMappingClassString(XCOFF::StorageMappingClass SMC); StringRef getRelocationTypeString(XCOFF::RelocationType Type); +SmallString<32> parseParmsType(uint32_t Value, unsigned ParmsNum); + +struct TracebackTable { + enum LanguageID : uint8_t { + C, + Fortran, + Pascal, + Ada, + PL1, + Basic, + Lisp, + Cobol, + Modula2, + CPlusPlus, + Rpg, + PL8, + PLIX = PL8, + Assembly, + Java, + ObjectiveC + }; + // Byte 1 + static constexpr uint32_t VersionMask = 0xFF00'0000; + static constexpr uint8_t VersionShift = 24; + + // Byte 2 + static constexpr uint32_t LanguageIdMask = 0x00FF'0000; + static constexpr uint8_t LanguageIdShift = 16; + + // Byte 3 + static constexpr uint32_t IsGlobaLinkageMask = 0x0000'8000; + static constexpr uint32_t IsOutOfLineEpilogOrPrologueMask = 0x0000'4000; + static constexpr uint32_t HasTraceBackTableOffsetMask = 0x0000'2000; + static constexpr uint32_t IsInternalProcedureMask = 0x0000'1000; + static constexpr uint32_t HasControlledStorageMask = 0x0000'0800; + static constexpr uint32_t IsTOClessMask = 0x0000'0400; + static constexpr uint32_t IsFloatingPointPresentMask = 0x0000'0200; + static constexpr uint32_t IsFloatingPointOperationLogOrAbortEnabledMask = + 0x0000'0100; + + // Byte 4 + static constexpr uint32_t IsInterruptHandlerMask = 0x0000'0080; + static constexpr uint32_t IsFunctionNamePresentMask = 0x0000'0040; + static constexpr uint32_t IsAllocaUsedMask = 0x0000'0020; + static constexpr uint32_t OnConditionDirectiveMask = 0x0000'001C; + static constexpr uint32_t IsCRSavedMask = 0x0000'0002; + static constexpr uint32_t IsLRSavedMask = 0x0000'0001; + static constexpr uint8_t OnConditionDirectiveShift = 2; + + // Byte 5 + static constexpr uint32_t IsBackChainStoredMask = 0x8000'0000; + static constexpr uint32_t IsFixupMask = 0x4000'0000; + static constexpr uint32_t FPRSavedMask = 0x3F00'0000; + static constexpr uint32_t FPRSavedShift = 24; + + // Byte 6 + static constexpr uint32_t HasVectorInfoMask = 0x0080'0000; + static constexpr uint32_t HasExtensionTableMask = 0x0040'0000; + static constexpr uint32_t GPRSavedMask = 0x003F'0000; + static constexpr uint32_t GPRSavedShift = 16; + + // Byte 7 + static constexpr uint32_t NumberOfFixedParmsMask = 0x0000'FF00; + static constexpr uint8_t NumberOfFixedParmsShift = 8; + + // Byte 8 + static constexpr uint32_t NumberOfFloatingPointParmsMask = 0x0000'00FE; + static constexpr uint32_t HasParmsOnStackMask = 0x0000'0001; + static constexpr uint8_t NumberOfFloatingPointParmsShift = 1; + + // Masks to select leftmost bits for decoding parameter type information. + // Bit to use when vector info is not presented. + static constexpr uint32_t ParmTypeIsFloatingBit = 0x8000'0000; + static constexpr uint32_t ParmTypeFloatingIsDoubleBit = 0x4000'0000; + // Bits to use when vector info is presented. + static constexpr uint32_t ParmTypeIsFixedBits = 0x0000'0000; + static constexpr uint32_t ParmTypeIsVectorBits = 0x4000'0000; + static constexpr uint32_t ParmTypeIsFloatingBits = 0x8000'0000; + static constexpr uint32_t ParmTypeIsDoubleBits = 0xC000'0000; + static constexpr uint32_t ParmTypeMask = 0xC000'0000; + + // Vector extension + static constexpr uint16_t NumberOfVRSavedMask = 0xFC00; + static constexpr uint16_t IsVRSavedOnStackMask = 0x0200; + static constexpr uint16_t HasVarArgsMask = 0x0100; + static constexpr uint8_t NumberOfVRSavedShift = 10; + + static constexpr uint16_t NumberOfVectorParmsMask = 0x00FE; + static constexpr uint16_t HasVMXInstructionMask = 0x0001; + static constexpr uint8_t NumberOfVectorParmsShift = 1; + + static constexpr uint32_t ParmTypeIsVectorCharBit = 0x0000'0000; + static constexpr uint32_t ParmTypeIsVectorShortBit = 0x4000'0000; + static constexpr uint32_t ParmTypeIsVectorIntBit = 0x8000'0000; + static constexpr uint32_t ParmTypeIsVectorFloatBit = 0xC000'0000; +}; + +// Extended Traceback table flags. +enum ExtendedTBTableFlag : uint8_t { + TB_OS1 = 0x80, ///< Reserved for OS use. + TB_RESERVED = 0x40, ///< Reserved for compiler. + TB_SSP_CANARY = 0x20, ///< stack smasher canary present on stack. + TB_OS2 = 0x10, ///< Reserved for OS use. + TB_EH_INFO = 0x08, ///< Exception handling info present. + TB_LONGTBTABLE2 = 0x01 ///< Additional tbtable extension exists. +}; + +StringRef getNameForTracebackTableLanguageId(TracebackTable::LanguageID LangId); +SmallString<32> getExtendedTBTableFlagString(uint8_t Flag); } // end namespace XCOFF } // end namespace llvm diff --git a/llvm/include/llvm/Bitcode/BitcodeCommon.h b/llvm/include/llvm/Bitcode/BitcodeCommon.h new file mode 100644 index 000000000000..6a3e74550bc4 --- /dev/null +++ b/llvm/include/llvm/Bitcode/BitcodeCommon.h @@ -0,0 +1,30 @@ +//===- BitcodeCommon.h - Common code for encode/decode --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header defines common code to be used by BitcodeWriter and +// BitcodeReader. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITCODE_BITCODECOMMON_H +#define LLVM_BITCODE_BITCODECOMMON_H + +#include "llvm/ADT/Bitfields.h" + +namespace llvm { + +struct AllocaPackedValues { + using Align = Bitfield::Element<unsigned, 0, 5>; + using UsedWithInAlloca = Bitfield::Element<bool, Align::NextBit, 1>; + using ExplicitType = Bitfield::Element<bool, UsedWithInAlloca::NextBit, 1>; + using SwiftError = Bitfield::Element<bool, ExplicitType::NextBit, 1>; +}; + +} // namespace llvm + +#endif // LLVM_BITCODE_BITCODECOMMON_H diff --git a/llvm/include/llvm/Bitcode/BitcodeConvenience.h b/llvm/include/llvm/Bitcode/BitcodeConvenience.h new file mode 100644 index 000000000000..0060d014ba82 --- /dev/null +++ b/llvm/include/llvm/Bitcode/BitcodeConvenience.h @@ -0,0 +1,486 @@ +//===- llvm/Bitcode/BitcodeConvenience.h - Convenience Wrappers -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file Convenience wrappers for the LLVM bitcode format and bitstream APIs. +/// +/// This allows you to use a sort of DSL to declare and use bitcode +/// abbreviations and records. Example: +/// +/// \code +/// using Metadata = BCRecordLayout< +/// METADATA_ID, // ID +/// BCFixed<16>, // Module format major version +/// BCFixed<16>, // Module format minor version +/// BCBlob // misc. version information +/// >; +/// Metadata metadata(Out); +/// metadata.emit(ScratchRecord, VERSION_MAJOR, VERSION_MINOR, Data); +/// \endcode +/// +/// For details on the bitcode format, see +/// http://llvm.org/docs/BitCodeFormat.html +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BITCODE_BITCODECONVENIENCE_H +#define LLVM_BITCODE_BITCODECONVENIENCE_H + +#include "llvm/Bitstream/BitCodes.h" +#include "llvm/Bitstream/BitstreamWriter.h" +#include <cstdint> + +namespace llvm { +namespace detail { +/// Convenience base for all kinds of bitcode abbreviation fields. +/// +/// This just defines common properties queried by the metaprogramming. +template <bool Compound = false> class BCField { +public: + static const bool IsCompound = Compound; + + /// Asserts that the given data is a valid value for this field. + template <typename T> static void assertValid(const T &data) {} + + /// Converts a raw numeric representation of this value to its preferred + /// type. + template <typename T> static T convert(T rawValue) { return rawValue; } +}; +} // namespace detail + +/// Represents a literal operand in a bitcode record. +/// +/// The value of a literal operand is the same for all instances of the record, +/// so it is only emitted in the abbreviation definition. +/// +/// Note that because this uses a compile-time template, you cannot have a +/// literal operand that is fixed at run-time without dropping down to the +/// raw LLVM APIs. +template <uint64_t Value> class BCLiteral : public detail::BCField<> { +public: + static void emitOp(llvm::BitCodeAbbrev &abbrev) { + abbrev.Add(llvm::BitCodeAbbrevOp(Value)); + } + + template <typename T> static void assertValid(const T &data) { + assert(data == Value && "data value does not match declared literal value"); + } +}; + +/// Represents a fixed-width value in a bitcode record. +/// +/// Note that the LLVM bitcode format only supports unsigned values. +template <unsigned Width> class BCFixed : public detail::BCField<> { +public: + static_assert(Width <= 64, "fixed-width field is too large"); + + static void emitOp(llvm::BitCodeAbbrev &abbrev) { + abbrev.Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, Width)); + } + + static void assertValid(const bool &data) { + assert(llvm::isUInt<Width>(data) && + "data value does not fit in the given bit width"); + } + + template <typename T> static void assertValid(const T &data) { + assert(data >= 0 && "cannot encode signed integers"); + assert(llvm::isUInt<Width>(data) && + "data value does not fit in the given bit width"); + } +}; + +/// Represents a variable-width value in a bitcode record. +/// +/// The \p Width parameter should include the continuation bit. +/// +/// Note that the LLVM bitcode format only supports unsigned values. +template <unsigned Width> class BCVBR : public detail::BCField<> { + static_assert(Width >= 2, "width does not have room for continuation bit"); + +public: + static void emitOp(llvm::BitCodeAbbrev &abbrev) { + abbrev.Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, Width)); + } + + template <typename T> static void assertValid(const T &data) { + assert(data >= 0 && "cannot encode signed integers"); + } +}; + +/// Represents a character encoded in LLVM's Char6 encoding. +/// +/// This format is suitable for encoding decimal numbers (without signs or +/// exponents) and C identifiers (without dollar signs), but not much else. +/// +/// \sa http://llvm.org/docs/BitCodeFormat.html#char6-encoded-value +class BCChar6 : public detail::BCField<> { +public: + static void emitOp(llvm::BitCodeAbbrev &abbrev) { + abbrev.Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Char6)); + } + + template <typename T> static void assertValid(const T &data) { + assert(llvm::BitCodeAbbrevOp::isChar6(data) && "invalid Char6 data"); + } + + template <typename T> char convert(T rawValue) { + return static_cast<char>(rawValue); + } +}; + +/// Represents an untyped blob of bytes. +/// +/// If present, this must be the last field in a record. +class BCBlob : public detail::BCField<true> { +public: + static void emitOp(llvm::BitCodeAbbrev &abbrev) { + abbrev.Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); + } +}; + +/// Represents an array of some other type. +/// +/// If present, this must be the last field in a record. +template <typename ElementTy> class BCArray : public detail::BCField<true> { + static_assert(!ElementTy::IsCompound, "arrays can only contain scalar types"); + +public: + static void emitOp(llvm::BitCodeAbbrev &abbrev) { + abbrev.Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Array)); + ElementTy::emitOp(abbrev); + } +}; + +namespace detail { +/// Attaches the last field to an abbreviation. +/// +/// This is the base case for \c emitOps. +/// +/// \sa BCRecordLayout::emitAbbrev +template <typename FieldTy> static void emitOps(llvm::BitCodeAbbrev &abbrev) { + FieldTy::emitOp(abbrev); +} + +/// Attaches fields to an abbreviation. +/// +/// This is the recursive case for \c emitOps. +/// +/// \sa BCRecordLayout::emitAbbrev +template <typename FieldTy, typename Next, typename... Rest> +static void emitOps(llvm::BitCodeAbbrev &abbrev) { + static_assert(!FieldTy::IsCompound, + "arrays and blobs may not appear in the middle of a record"); + FieldTy::emitOp(abbrev); + emitOps<Next, Rest...>(abbrev); +} + +/// Helper class for dealing with a scalar element in the middle of a record. +/// +/// \sa BCRecordLayout +template <typename ElementTy, typename... Fields> class BCRecordCoding { +public: + template <typename BufferTy, typename ElementDataTy, typename... DataTy> + static void emit(llvm::BitstreamWriter &Stream, BufferTy &buffer, + unsigned code, ElementDataTy element, DataTy &&...data) { + static_assert(!ElementTy::IsCompound, + "arrays and blobs may not appear in the middle of a record"); + ElementTy::assertValid(element); + buffer.push_back(element); + BCRecordCoding<Fields...>::emit(Stream, buffer, code, + std::forward<DataTy>(data)...); + } + + template <typename T, typename ElementDataTy, typename... DataTy> + static void read(ArrayRef<T> buffer, ElementDataTy &element, + DataTy &&...data) { + assert(!buffer.empty() && "too few elements in buffer"); + element = ElementTy::convert(buffer.front()); + BCRecordCoding<Fields...>::read(buffer.slice(1), + std::forward<DataTy>(data)...); + } + + template <typename T, typename... DataTy> + static void read(ArrayRef<T> buffer, NoneType, DataTy &&...data) { + assert(!buffer.empty() && "too few elements in buffer"); + BCRecordCoding<Fields...>::read(buffer.slice(1), + std::forward<DataTy>(data)...); + } +}; + +/// Helper class for dealing with a scalar element at the end of a record. +/// +/// This has a separate implementation because up until now we've only been +/// \em building the record (into a data buffer), and now we need to hand it +/// off to the BitstreamWriter to be emitted. +/// +/// \sa BCRecordLayout +template <typename ElementTy> class BCRecordCoding<ElementTy> { +public: + template <typename BufferTy, typename DataTy> + static void emit(llvm::BitstreamWriter &Stream, BufferTy &buffer, + unsigned code, const DataTy &data) { + static_assert(!ElementTy::IsCompound, + "arrays and blobs need special handling"); + ElementTy::assertValid(data); + buffer.push_back(data); + Stream.EmitRecordWithAbbrev(code, buffer); + } + + template <typename T, typename DataTy> + static void read(ArrayRef<T> buffer, DataTy &data) { + assert(buffer.size() == 1 && "record data does not match layout"); + data = ElementTy::convert(buffer.front()); + } + + template <typename T> static void read(ArrayRef<T> buffer, NoneType) { + assert(buffer.size() == 1 && "record data does not match layout"); + (void)buffer; + } + + template <typename T> static void read(ArrayRef<T> buffer) = delete; +}; + +/// Helper class for dealing with an array at the end of a record. +/// +/// \sa BCRecordLayout::emitRecord +template <typename ElementTy> class BCRecordCoding<BCArray<ElementTy>> { +public: + template <typename BufferTy> + static void emit(llvm::BitstreamWriter &Stream, BufferTy &buffer, + unsigned code, StringRef data) { + // TODO: validate array data. + Stream.EmitRecordWithArray(code, buffer, data); + } + + template <typename BufferTy, typename ArrayTy> + static void emit(llvm::BitstreamWriter &Stream, BufferTy &buffer, + unsigned code, const ArrayTy &array) { +#ifndef NDEBUG + for (auto &element : array) + ElementTy::assertValid(element); +#endif + buffer.reserve(buffer.size() + std::distance(array.begin(), array.end())); + std::copy(array.begin(), array.end(), std::back_inserter(buffer)); + Stream.EmitRecordWithAbbrev(code, buffer); + } + + template <typename BufferTy, typename ElementDataTy, typename... DataTy> + static void emit(llvm::BitstreamWriter &Stream, BufferTy &buffer, + unsigned code, ElementDataTy element, DataTy... data) { + std::array<ElementDataTy, 1 + sizeof...(data)> array{{element, data...}}; + emit(Stream, buffer, code, array); + } + + template <typename BufferTy> + static void emit(llvm::BitstreamWriter &Stream, BufferTy &Buffer, + unsigned code, NoneType) { + Stream.EmitRecordWithAbbrev(code, Buffer); + } + + template <typename T> + static void read(ArrayRef<T> Buffer, ArrayRef<T> &rawData) { + rawData = Buffer; + } + + template <typename T, typename ArrayTy> + static void read(ArrayRef<T> buffer, ArrayTy &array) { + array.append(llvm::map_iterator(buffer.begin(), T::convert), + llvm::map_iterator(buffer.end(), T::convert)); + } + + template <typename T> static void read(ArrayRef<T> buffer, NoneType) { + (void)buffer; + } + + template <typename T> static void read(ArrayRef<T> buffer) = delete; +}; + +/// Helper class for dealing with a blob at the end of a record. +/// +/// \sa BCRecordLayout +template <> class BCRecordCoding<BCBlob> { +public: + template <typename BufferTy> + static void emit(llvm::BitstreamWriter &Stream, BufferTy &buffer, + unsigned code, StringRef data) { + Stream.EmitRecordWithBlob(code, buffer, data); + } + + template <typename T> static void read(ArrayRef<T> buffer) { (void)buffer; } + + /// Blob data is not stored in the buffer if you are using the correct + /// accessor; this method should not be used. + template <typename T, typename DataTy> + static void read(ArrayRef<T> buffer, DataTy &data) = delete; +}; + +/// A type trait whose \c type field is the last of its template parameters. +template <typename Head, typename... Tail> struct last_type { + using type = typename last_type<Tail...>::type; +}; + +template <typename Head> struct last_type<Head> { using type = Head; }; + +/// A type trait whose \c value field is \c true if the last type is BCBlob. +template <typename... Types> +using has_blob = std::is_same<BCBlob, typename last_type<int, Types...>::type>; + +/// A type trait whose \c value field is \c true if the given type is a +/// BCArray (of any element kind). +template <typename T> struct is_array { +private: + template <typename E> static bool check(BCArray<E> *); + static int check(...); + +public: + typedef bool value_type; + static constexpr bool value = !std::is_same<decltype(check((T *)nullptr)), + decltype(check(false))>::value; +}; + +/// A type trait whose \c value field is \c true if the last type is a +/// BCArray (of any element kind). +template <typename... Types> +using has_array = is_array<typename last_type<int, Types...>::type>; +} // namespace detail + +/// Represents a single bitcode record type. +/// +/// This class template is meant to be instantiated and then given a name, +/// so that from then on that name can be used. +template <typename IDField, typename... Fields> class BCGenericRecordLayout { + llvm::BitstreamWriter &Stream; + +public: + /// The abbreviation code used for this record in the current block. + /// + /// Note that this is not the same as the semantic record code, which is the + /// first field of the record. + const unsigned AbbrevCode; + + /// Create a layout and register it with the given bitstream writer. + explicit BCGenericRecordLayout(llvm::BitstreamWriter &Stream) + : Stream(Stream), AbbrevCode(emitAbbrev(Stream)) {} + + /// Emit a record to the bitstream writer, using the given buffer for scratch + /// space. + /// + /// Note that even fixed arguments must be specified here. + template <typename BufferTy, typename... Data> + void emit(BufferTy &buffer, unsigned id, Data &&...data) const { + emitRecord(Stream, buffer, AbbrevCode, id, std::forward<Data>(data)...); + } + + /// Registers this record's layout with the bitstream reader. + /// + /// eturns The abbreviation code for the newly-registered record type. + static unsigned emitAbbrev(llvm::BitstreamWriter &Stream) { + auto Abbrev = std::make_shared<llvm::BitCodeAbbrev>(); + detail::emitOps<IDField, Fields...>(*Abbrev); + return Stream.EmitAbbrev(std::move(Abbrev)); + } + + /// Emit a record identified by \p abbrCode to bitstream reader \p Stream, + /// using \p buffer for scratch space. + /// + /// Note that even fixed arguments must be specified here. Blobs are passed + /// as StringRefs, while arrays can be passed inline, as aggregates, or as + /// pre-encoded StringRef data. Skipped values and empty arrays should use + /// the special Nothing value. + template <typename BufferTy, typename... Data> + static void emitRecord(llvm::BitstreamWriter &Stream, BufferTy &buffer, + unsigned abbrCode, unsigned recordID, Data &&...data) { + static_assert(sizeof...(data) <= sizeof...(Fields) || + detail::has_array<Fields...>::value, + "Too many record elements"); + static_assert(sizeof...(data) >= sizeof...(Fields), + "Too few record elements"); + buffer.clear(); + detail::BCRecordCoding<IDField, Fields...>::emit( + Stream, buffer, abbrCode, recordID, std::forward<Data>(data)...); + } + + /// Extract record data from \p buffer into the given data fields. + /// + /// Note that even fixed arguments must be specified here. Pass \c Nothing + /// if you don't care about a particular parameter. Blob data is not included + /// in the buffer and should be handled separately by the caller. + template <typename ElementTy, typename... Data> + static void readRecord(ArrayRef<ElementTy> buffer, Data &&...data) { + static_assert(sizeof...(data) <= sizeof...(Fields), + "Too many record elements"); + static_assert(sizeof...(Fields) <= + sizeof...(data) + detail::has_blob<Fields...>::value, + "Too few record elements"); + return detail::BCRecordCoding<Fields...>::read(buffer, + std::forward<Data>(data)...); + } + + /// Extract record data from \p buffer into the given data fields. + /// + /// Note that even fixed arguments must be specified here. Pass \c Nothing + /// if you don't care about a particular parameter. Blob data is not included + /// in the buffer and should be handled separately by the caller. + template <typename BufferTy, typename... Data> + static void readRecord(BufferTy &buffer, Data &&...data) { + return readRecord(llvm::makeArrayRef(buffer), std::forward<Data>(data)...); + } +}; + +/// A record with a fixed record code. +template <unsigned RecordCode, typename... Fields> +class BCRecordLayout + : public BCGenericRecordLayout<BCLiteral<RecordCode>, Fields...> { + using Base = BCGenericRecordLayout<BCLiteral<RecordCode>, Fields...>; + +public: + enum : unsigned { + /// The record code associated with this layout. + Code = RecordCode + }; + + /// Create a layout and register it with the given bitstream writer. + explicit BCRecordLayout(llvm::BitstreamWriter &Stream) : Base(Stream) {} + + /// Emit a record to the bitstream writer, using the given buffer for scratch + /// space. + /// + /// Note that even fixed arguments must be specified here. + template <typename BufferTy, typename... Data> + void emit(BufferTy &buffer, Data &&...data) const { + Base::emit(buffer, RecordCode, std::forward<Data>(data)...); + } + + /// Emit a record identified by \p abbrCode to bitstream reader \p Stream, + /// using \p buffer for scratch space. + /// + /// Note that even fixed arguments must be specified here. Currently, arrays + /// and blobs can only be passed as StringRefs. + template <typename BufferTy, typename... Data> + static void emitRecord(llvm::BitstreamWriter &Stream, BufferTy &buffer, + unsigned abbrCode, Data &&...data) { + Base::emitRecord(Stream, buffer, abbrCode, RecordCode, + std::forward<Data>(data)...); + } +}; + +/// RAII object to pair entering and exiting a sub-block. +class BCBlockRAII { + llvm::BitstreamWriter &Stream; + +public: + BCBlockRAII(llvm::BitstreamWriter &Stream, unsigned block, unsigned abbrev) + : Stream(Stream) { + Stream.EnterSubblock(block, abbrev); + } + + ~BCBlockRAII() { Stream.ExitBlock(); } +}; +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h index 4beb89d30e00..7ad2d37a2a35 100644 --- a/llvm/include/llvm/Bitcode/BitcodeWriter.h +++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h @@ -47,7 +47,7 @@ class raw_ostream; public: /// Create a BitcodeWriter that writes to Buffer. - BitcodeWriter(SmallVectorImpl<char> &Buffer); + BitcodeWriter(SmallVectorImpl<char> &Buffer, raw_fd_stream *FS = nullptr); ~BitcodeWriter(); @@ -152,10 +152,18 @@ class raw_ostream; const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex = nullptr); - /// Save a copy of the llvm IR as data in the __LLVM,__bitcode section. + /// If EmbedBitcode is set, save a copy of the llvm IR as data in the + /// __LLVM,__bitcode section (.llvmbc on non-MacOS). + /// If available, pass the serialized module via the Buf parameter. If not, + /// pass an empty (default-initialized) MemoryBufferRef, and the serialization + /// will be handled by this API. The same behavior happens if the provided Buf + /// is not bitcode (i.e. if it's invalid data or even textual LLVM assembly). + /// If EmbedCmdline is set, the command line is also exported in + /// the corresponding section (__LLVM,_cmdline / .llvmcmd) - even if CmdArgs + /// were empty. void EmbedBitcodeInModule(Module &M, MemoryBufferRef Buf, bool EmbedBitcode, - bool EmbedMarker, - const std::vector<uint8_t> *CmdArgs); + bool EmbedCmdline, + const std::vector<uint8_t> &CmdArgs); } // end namespace llvm diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index de4fe6630324..5b4854d6c95e 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -168,7 +168,8 @@ enum TypeCodes { TYPE_CODE_TOKEN = 22, // TOKEN - TYPE_CODE_BFLOAT = 23 // BRAIN FLOATING POINT + TYPE_CODE_BFLOAT = 23, // BRAIN FLOATING POINT + TYPE_CODE_X86_AMX = 24 // X86 AMX }; enum OperandBundleTagCode { @@ -338,7 +339,11 @@ enum MetadataCodes { METADATA_INDEX_OFFSET = 38, // [offset] METADATA_INDEX = 39, // [bitpos] METADATA_LABEL = 40, // [distinct, scope, name, file, line] - METADATA_COMMON_BLOCK = 44, // [distinct, scope, name, variable,...] + METADATA_STRING_TYPE = 41, // [distinct, name, size, align,...] + // Codes 42 and 43 are reserved for support for Fortran array specific debug + // info. + METADATA_COMMON_BLOCK = 44, // [distinct, scope, name, variable,...] + METADATA_GENERIC_SUBRANGE = 45 // [distinct, count, lo, up, stride] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each @@ -371,6 +376,7 @@ enum ConstantsCodes { // asmdialect,asmstr,conststr] CST_CODE_CE_GEP_WITH_INRANGE_INDEX = 24, // [opty, flags, n x operands] CST_CODE_CE_UNOP = 25, // CE_UNOP: [opcode, opval] + CST_CODE_POISON = 26, // POISON }; /// CastOpcodes - These are values used in the bitcode files to encode which @@ -536,8 +542,9 @@ enum FunctionCodes { FUNC_CODE_DEBUG_LOC = 35, // DEBUG_LOC: [Line,Col,ScopeVal, IAVal] FUNC_CODE_INST_FENCE = 36, // FENCE: [ordering, synchscope] - FUNC_CODE_INST_CMPXCHG_OLD = 37, // CMPXCHG: [ptrty,ptr,cmp,new, align, vol, - // ordering, synchscope] + FUNC_CODE_INST_CMPXCHG_OLD = 37, // CMPXCHG: [ptrty, ptr, cmp, val, vol, + // ordering, synchscope, + // failure_ordering?, weak?] FUNC_CODE_INST_ATOMICRMW = 38, // ATOMICRMW: [ptrty,ptr,val, operation, // align, vol, // ordering, synchscope] @@ -551,8 +558,9 @@ enum FunctionCodes { FUNC_CODE_INST_GEP = 43, // GEP: [inbounds, n x operands] FUNC_CODE_INST_STORE = 44, // STORE: [ptrty,ptr,valty,val, align, vol] FUNC_CODE_INST_STOREATOMIC = 45, // STORE: [ptrty,ptr,val, align, vol - FUNC_CODE_INST_CMPXCHG = 46, // CMPXCHG: [ptrty,ptr,valty,cmp,new, align, - // vol,ordering,synchscope] + FUNC_CODE_INST_CMPXCHG = 46, // CMPXCHG: [ptrty, ptr, cmp, val, vol, + // success_ordering, synchscope, + // failure_ordering, weak] FUNC_CODE_INST_LANDINGPAD = 47, // LANDINGPAD: [ty,val,num,id0,val0...] FUNC_CODE_INST_CLEANUPRET = 48, // CLEANUPRET: [val] or [val,bb#] FUNC_CODE_INST_CATCHRET = 49, // CATCHRET: [val,bb#] @@ -644,6 +652,11 @@ enum AttributeKindCodes { ATTR_KIND_NO_MERGE = 66, ATTR_KIND_NULL_POINTER_IS_VALID = 67, ATTR_KIND_NOUNDEF = 68, + ATTR_KIND_BYREF = 69, + ATTR_KIND_MUSTPROGRESS = 70, + ATTR_KIND_NO_CALLBACK = 71, + ATTR_KIND_HOT = 72, + ATTR_KIND_NO_PROFILE = 73, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/Bitstream/BitCodes.h b/llvm/include/llvm/Bitstream/BitCodes.h index 41a3de3b20ef..9cd4e535a470 100644 --- a/llvm/include/llvm/Bitstream/BitCodes.h +++ b/llvm/include/llvm/Bitstream/BitCodes.h @@ -18,6 +18,7 @@ #define LLVM_BITSTREAM_BITCODES_H #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" #include <cassert> @@ -137,13 +138,7 @@ public: } /// isChar6 - Return true if this character is legal in the Char6 encoding. - static bool isChar6(char C) { - if (C >= 'a' && C <= 'z') return true; - if (C >= 'A' && C <= 'Z') return true; - if (C >= '0' && C <= '9') return true; - if (C == '.' || C == '_') return true; - return false; - } + static bool isChar6(char C) { return isAlnum(C) || C == '.' || C == '_'; } static unsigned EncodeChar6(char C) { if (C >= 'a' && C <= 'z') return C-'a'; if (C >= 'A' && C <= 'Z') return C-'A'+26; diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h index c0ead19dc71d..3954df4897ae 100644 --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -20,17 +20,28 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Bitstream/BitCodes.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> #include <vector> namespace llvm { class BitstreamWriter { + /// Out - The buffer that keeps unflushed bytes. SmallVectorImpl<char> &Out; + /// FS - The file stream that Out flushes to. If FS is nullptr, it does not + /// support read or seek, Out cannot be flushed until all data are written. + raw_fd_stream *FS; + + /// FlushThreshold - If FS is valid, this is the threshold (unit B) to flush + /// FS. + const uint64_t FlushThreshold; + /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use. unsigned CurBit; - /// CurValue - The current value. Only bits < CurBit are valid. + /// CurValue - The current value. Only bits < CurBit are valid. uint32_t CurValue; /// CurCodeSize - This is the declared size of code values used for the @@ -64,15 +75,19 @@ class BitstreamWriter { void WriteByte(unsigned char Value) { Out.push_back(Value); + FlushToFile(); } void WriteWord(unsigned Value) { Value = support::endian::byte_swap<uint32_t, support::little>(Value); Out.append(reinterpret_cast<const char *>(&Value), reinterpret_cast<const char *>(&Value + 1)); + FlushToFile(); } - size_t GetBufferOffset() const { return Out.size(); } + uint64_t GetNumOfFlushedBytes() const { return FS ? FS->tell() : 0; } + + size_t GetBufferOffset() const { return Out.size() + GetNumOfFlushedBytes(); } size_t GetWordIndex() const { size_t Offset = GetBufferOffset(); @@ -80,9 +95,29 @@ class BitstreamWriter { return Offset / 4; } + /// If the related file stream supports reading, seeking and writing, flush + /// the buffer if its size is above a threshold. + void FlushToFile() { + if (!FS) + return; + if (Out.size() < FlushThreshold) + return; + FS->write((char *)&Out.front(), Out.size()); + Out.clear(); + } + public: - explicit BitstreamWriter(SmallVectorImpl<char> &O) - : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {} + /// Create a BitstreamWriter that writes to Buffer \p O. + /// + /// \p FS is the file stream that \p O flushes to incrementally. If \p FS is + /// null, \p O does not flush incrementially, but writes to disk at the end. + /// + /// \p FlushThreshold is the threshold (unit M) to flush \p O if \p FS is + /// valid. + BitstreamWriter(SmallVectorImpl<char> &O, raw_fd_stream *FS = nullptr, + uint32_t FlushThreshold = 512) + : Out(O), FS(FS), FlushThreshold(FlushThreshold << 20), CurBit(0), + CurValue(0), CurCodeSize(2) {} ~BitstreamWriter() { assert(CurBit == 0 && "Unflushed data remaining"); @@ -103,12 +138,60 @@ public: /// with the specified value. void BackpatchWord(uint64_t BitNo, unsigned NewWord) { using namespace llvm::support; - unsigned ByteNo = BitNo / 8; - assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>( - &Out[ByteNo], BitNo & 7)) && - "Expected to be patching over 0-value placeholders"); - endian::writeAtBitAlignment<uint32_t, little, unaligned>( - &Out[ByteNo], NewWord, BitNo & 7); + uint64_t ByteNo = BitNo / 8; + uint64_t StartBit = BitNo & 7; + uint64_t NumOfFlushedBytes = GetNumOfFlushedBytes(); + + if (ByteNo >= NumOfFlushedBytes) { + assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>( + &Out[ByteNo - NumOfFlushedBytes], StartBit)) && + "Expected to be patching over 0-value placeholders"); + endian::writeAtBitAlignment<uint32_t, little, unaligned>( + &Out[ByteNo - NumOfFlushedBytes], NewWord, StartBit); + return; + } + + // If the byte offset to backpatch is flushed, use seek to backfill data. + // First, save the file position to restore later. + uint64_t CurPos = FS->tell(); + + // Copy data to update into Bytes from the file FS and the buffer Out. + char Bytes[9]; // Use one more byte to silence a warning from Visual C++. + size_t BytesNum = StartBit ? 8 : 4; + size_t BytesFromDisk = std::min(static_cast<uint64_t>(BytesNum), NumOfFlushedBytes - ByteNo); + size_t BytesFromBuffer = BytesNum - BytesFromDisk; + + // When unaligned, copy existing data into Bytes from the file FS and the + // buffer Out so that it can be updated before writing. For debug builds + // read bytes unconditionally in order to check that the existing value is 0 + // as expected. +#ifdef NDEBUG + if (StartBit) +#endif + { + FS->seek(ByteNo); + ssize_t BytesRead = FS->read(Bytes, BytesFromDisk); + (void)BytesRead; // silence warning + assert(BytesRead >= 0 && static_cast<size_t>(BytesRead) == BytesFromDisk); + for (size_t i = 0; i < BytesFromBuffer; ++i) + Bytes[BytesFromDisk + i] = Out[i]; + assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>( + Bytes, StartBit)) && + "Expected to be patching over 0-value placeholders"); + } + + // Update Bytes in terms of bit offset and value. + endian::writeAtBitAlignment<uint32_t, little, unaligned>(Bytes, NewWord, + StartBit); + + // Copy updated data back to the file FS and the buffer Out. + FS->seek(ByteNo); + FS->write(Bytes, BytesFromDisk); + for (size_t i = 0; i < BytesFromBuffer; ++i) + Out[i] = Bytes[BytesFromDisk + i]; + + // Restore the file position. + FS->seek(CurPos); } void BackpatchWord64(uint64_t BitNo, uint64_t Val) { diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h index fe610b5bdc8d..bdfb416d9bd9 100644 --- a/llvm/include/llvm/CodeGen/Analysis.h +++ b/llvm/include/llvm/CodeGen/Analysis.h @@ -92,11 +92,6 @@ void computeValueLLTs(const DataLayout &DL, Type &Ty, /// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. GlobalValue *ExtractTypeInfo(Value *V); -/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being -/// processed uses a memory 'm' constraint. -bool hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos, - const TargetLowering &TLI); - /// getFCmpCondCode - Return the ISD condition code corresponding to /// the given LLVM IR floating-point condition code. This includes /// consideration of global floating-point math flags. diff --git a/llvm/include/llvm/CodeGen/AntiDepBreaker.h b/llvm/include/llvm/CodeGen/AntiDepBreaker.h index d75c13e2dd75..0553d7d452a4 100644 --- a/llvm/include/llvm/CodeGen/AntiDepBreaker.h +++ b/llvm/include/llvm/CodeGen/AntiDepBreaker.h @@ -17,7 +17,6 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Compiler.h" #include <cassert> diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 0eb950861af6..76486b0b48ce 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -66,6 +66,7 @@ class MCSymbol; class MCTargetOptions; class MDNode; class Module; +class PseudoProbeHandler; class raw_ostream; class StackMaps; class StringRef; @@ -139,9 +140,30 @@ public: using GOTEquivUsePair = std::pair<const GlobalVariable *, unsigned>; MapVector<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs; + /// struct HandlerInfo and Handlers permit users or target extended + /// AsmPrinter to add their own handlers. + struct HandlerInfo { + std::unique_ptr<AsmPrinterHandler> Handler; + const char *TimerName; + const char *TimerDescription; + const char *TimerGroupName; + const char *TimerGroupDescription; + + HandlerInfo(std::unique_ptr<AsmPrinterHandler> Handler, + const char *TimerName, const char *TimerDescription, + const char *TimerGroupName, const char *TimerGroupDescription) + : Handler(std::move(Handler)), TimerName(TimerName), + TimerDescription(TimerDescription), TimerGroupName(TimerGroupName), + TimerGroupDescription(TimerGroupDescription) {} + }; + private: MCSymbol *CurrentFnEnd = nullptr; - MCSymbol *CurExceptionSym = nullptr; + + /// Map a basic block section ID to the exception symbol associated with that + /// section. Map entries are assigned and looked up via + /// AsmPrinter::getMBBExceptionSym. + DenseMap<unsigned, MCSymbol *> MBBSectionExceptionSyms; // The symbol used to represent the start of the current BB section of the // function. This is used to calculate the size of the BB section. @@ -158,26 +180,10 @@ private: protected: MCSymbol *CurrentFnBegin = nullptr; - /// Protected struct HandlerInfo and Handlers permit target extended - /// AsmPrinter adds their own handlers. - struct HandlerInfo { - std::unique_ptr<AsmPrinterHandler> Handler; - const char *TimerName; - const char *TimerDescription; - const char *TimerGroupName; - const char *TimerGroupDescription; - - HandlerInfo(std::unique_ptr<AsmPrinterHandler> Handler, - const char *TimerName, const char *TimerDescription, - const char *TimerGroupName, const char *TimerGroupDescription) - : Handler(std::move(Handler)), TimerName(TimerName), - TimerDescription(TimerDescription), TimerGroupName(TimerGroupName), - TimerGroupDescription(TimerGroupDescription) {} - }; - /// A vector of all debug/EH info emitters we should use. This vector /// maintains ownership of the emitters. - SmallVector<HandlerInfo, 1> Handlers; + std::vector<HandlerInfo> Handlers; + size_t NumUserHandlers = 0; public: struct SrcMgrDiagInfo { @@ -201,6 +207,10 @@ private: /// If the target supports dwarf debug info, this pointer is non-null. DwarfDebug *DD = nullptr; + /// A handler that supports pseudo probe emission with embedded inline + /// context. + PseudoProbeHandler *PP = nullptr; + /// If the current module uses dwarf CFI annotations strictly for debugging. bool isCFIMoveForDebugging = false; @@ -216,6 +226,14 @@ public: uint16_t getDwarfVersion() const; void setDwarfVersion(uint16_t Version); + bool isDwarf64() const; + + /// Returns 4 for DWARF32 and 8 for DWARF64. + unsigned int getDwarfOffsetByteSize() const; + + /// Returns 4 for DWARF32 and 12 for DWARF64. + unsigned int getUnitLengthFieldByteSize() const; + bool isPositionIndependent() const; /// Return true if assembly output should contain comments. @@ -230,7 +248,10 @@ public: MCSymbol *getFunctionBegin() const { return CurrentFnBegin; } MCSymbol *getFunctionEnd() const { return CurrentFnEnd; } - MCSymbol *getCurExceptionSym(); + + // Return the exception symbol associated with the MBB section containing a + // given basic block. + MCSymbol *getMBBExceptionSym(const MachineBasicBlock &MBB); /// Return information about object file lowering. const TargetLoweringObjectFile &getObjFileLowering() const; @@ -342,6 +363,10 @@ public: void emitStackSizeSection(const MachineFunction &MF); + void emitBBAddrMapSection(const MachineFunction &MF); + + void emitPseudoProbe(const MachineInstr &MI); + void emitRemarksSection(remarks::RemarkStreamer &RS); enum CFIMoveType { CFI_M_None, CFI_M_EH, CFI_M_Debug }; @@ -369,6 +394,32 @@ public: /// so, emit it and return true, otherwise do nothing and return false. bool emitSpecialLLVMGlobal(const GlobalVariable *GV); + /// `llvm.global_ctors` and `llvm.global_dtors` are arrays of Structor + /// structs. + /// + /// Priority - init priority + /// Func - global initialization or global clean-up function + /// ComdatKey - associated data + struct Structor { + int Priority = 0; + Constant *Func = nullptr; + GlobalValue *ComdatKey = nullptr; + + Structor() = default; + }; + + /// This method gathers an array of Structors and then sorts them out by + /// Priority. + /// @param List The initializer of `llvm.global_ctors` or `llvm.global_dtors` + /// array. + /// @param[out] Structors Sorted Structor structs by Priority. + void preprocessXXStructorList(const DataLayout &DL, const Constant *List, + SmallVector<Structor, 8> &Structors); + + /// This method emits `llvm.global_ctors` or `llvm.global_dtors` list. + virtual void emitXXStructorList(const DataLayout &DL, const Constant *List, + bool IsCtor); + /// Emit an alignment directive to the specified power of two boundary. If a /// global value is specified, and if that global has an explicit alignment /// requested, it will override the alignment request if required for @@ -403,6 +454,11 @@ public: // Overridable Hooks //===------------------------------------------------------------------===// + void addAsmPrinterHandler(HandlerInfo Handler) { + Handlers.insert(Handlers.begin(), std::move(Handler)); + NumUserHandlers++; + } + // Targets can, or in the case of EmitInstruction, must implement these to // customize output. @@ -534,9 +590,6 @@ public: emitLabelPlusOffset(Label, 0, Size, IsSectionRelative); } - /// Emit something like ".long Label + Offset". - void emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const; - //===------------------------------------------------------------------===// // Dwarf Emission Helper Routines //===------------------------------------------------------------------===// @@ -557,7 +610,7 @@ public: unsigned GetSizeOfEncodedValue(unsigned Encoding) const; /// Emit reference to a ttype global with a specified encoding. - void emitTTypeReference(const GlobalValue *GV, unsigned Encoding) const; + virtual void emitTTypeReference(const GlobalValue *GV, unsigned Encoding); /// Emit a reference to a symbol for use in dwarf. Different object formats /// represent this in different ways. Some use a relocation others encode @@ -565,18 +618,39 @@ public: void emitDwarfSymbolReference(const MCSymbol *Label, bool ForceOffset = false) const; - /// Emit the 4-byte offset of a string from the start of its section. + /// Emit the 4- or 8-byte offset of a string from the start of its section. /// /// When possible, emit a DwarfStringPool section offset without any /// relocations, and without using the symbol. Otherwise, defers to \a /// emitDwarfSymbolReference(). + /// + /// The length of the emitted value depends on the DWARF format. void emitDwarfStringOffset(DwarfStringPoolEntry S) const; - /// Emit the 4-byte offset of a string from the start of its section. + /// Emit the 4-or 8-byte offset of a string from the start of its section. void emitDwarfStringOffset(DwarfStringPoolEntryRef S) const { emitDwarfStringOffset(S.getEntry()); } + /// Emit something like ".long Label + Offset" or ".quad Label + Offset" + /// depending on the DWARF format. + void emitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const; + + /// Emit 32- or 64-bit value depending on the DWARF format. + void emitDwarfLengthOrOffset(uint64_t Value) const; + + /// Emit a special value of 0xffffffff if producing 64-bit debugging info. + void maybeEmitDwarf64Mark() const; + + /// Emit a unit length field. The actual format, DWARF32 or DWARF64, is chosen + /// according to the settings. + void emitDwarfUnitLength(uint64_t Length, const Twine &Comment) const; + + /// Emit a unit length field. The actual format, DWARF32 or DWARF64, is chosen + /// according to the settings. + void emitDwarfUnitLength(const MCSymbol *Hi, const MCSymbol *Lo, + const Twine &Comment) const; + /// Emit reference to a call site with a specified encoding void emitCallSiteOffset(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Encoding) const; @@ -713,12 +787,13 @@ private: void emitModuleIdents(Module &M); /// Emit bytes for llvm.commandline metadata. void emitModuleCommandLines(Module &M); - void emitXXStructorList(const DataLayout &DL, const Constant *List, - bool isCtor); GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &S); /// Emit GlobalAlias or GlobalIFunc. void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol &GIS); + + /// This method decides whether the specified basic block requires a label. + bool shouldEmitLabelForBasicBlock(const MachineBasicBlock &MBB) const; }; } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/AsmPrinterHandler.h b/llvm/include/llvm/CodeGen/AsmPrinterHandler.h index 899d067d03f0..dc81a3040097 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinterHandler.h +++ b/llvm/include/llvm/CodeGen/AsmPrinterHandler.h @@ -23,8 +23,10 @@ class MachineBasicBlock; class MachineFunction; class MachineInstr; class MCSymbol; +class Module; -typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm); +typedef MCSymbol *ExceptionSymbolProvider(AsmPrinter *Asm, + const MachineBasicBlock *MBB); /// Collects and handles AsmPrinter objects required to build debug /// or EH information. @@ -36,6 +38,8 @@ public: /// this tracks that size. virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0; + virtual void beginModule(Module *M) {} + /// Emit all sections that should come after the content. virtual void endModule() = 0; @@ -74,6 +78,7 @@ public: /// Process end of a basic block during basic block sections. virtual void endBasicBlock(const MachineBasicBlock &MBB) {} }; + } // End of namespace llvm #endif diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h new file mode 100644 index 000000000000..d8da3be0cd4c --- /dev/null +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h @@ -0,0 +1,30 @@ +//===- BasicBlockSectionUtils.h - Utilities for basic block sections --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H +#define LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/CommandLine.h" + +namespace llvm { + +extern cl::opt<std::string> BBSectionsColdTextPrefix; + +class MachineFunction; +class MachineBasicBlock; + +using MachineBasicBlockComparator = + function_ref<bool(const MachineBasicBlock &, const MachineBasicBlock &)>; + +void sortBasicBlocksAndUpdateBranches(MachineFunction &MF, + MachineBasicBlockComparator MBBCmp); + +} // end namespace llvm + +#endif // LLVM_CODEGEN_BASICBLOCKSECTIONUTILS_H diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 407f09063dce..9514dd22be80 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -40,7 +40,6 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" -#include "llvm/MC/MCSchedule.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -115,12 +114,14 @@ private: /// Estimate a cost of subvector extraction as a sequence of extract and /// insert operations. - unsigned getExtractSubvectorOverhead(FixedVectorType *VTy, int Index, + unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index, FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only extract subvectors from vectors"); int NumSubElts = SubVTy->getNumElements(); - assert((Index + NumSubElts) <= (int)VTy->getNumElements() && + assert((!isa<FixedVectorType>(VTy) || + (Index + NumSubElts) <= + (int)cast<FixedVectorType>(VTy)->getNumElements()) && "SK_ExtractSubvector index out of range"); unsigned Cost = 0; @@ -138,12 +139,14 @@ private: /// Estimate a cost of subvector insertion as a sequence of extract and /// insert operations. - unsigned getInsertSubvectorOverhead(FixedVectorType *VTy, int Index, + unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index, FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only insert subvectors into vectors"); int NumSubElts = SubVTy->getNumElements(); - assert((Index + NumSubElts) <= (int)VTy->getNumElements() && + assert((!isa<FixedVectorType>(VTy) || + (Index + NumSubElts) <= + (int)cast<FixedVectorType>(VTy)->getNumElements()) && "SK_InsertSubvector index out of range"); unsigned Cost = 0; @@ -222,7 +225,11 @@ public: } bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { - return getTLI()->isNoopAddrSpaceCast(FromAS, ToAS); + return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS); + } + + unsigned getAssumedAddrSpace(const Value *V) const { + return getTLI()->getTargetMachine().getAssumedAddrSpace(V); } Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, @@ -265,6 +272,10 @@ public: return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); } + bool isNumRegsMajorCostOfLSR() { + return TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR(); + } + bool isProfitableLSRChainElement(Instruction *I) { return TargetTransformInfoImplBase::isProfitableLSRChainElement(I); } @@ -294,6 +305,10 @@ public: return getTLI()->isTypeLegal(VT); } + unsigned getRegUsageForType(Type *Ty) { + return getTLI()->getTypeLegalizationCost(DL, Ty).first; + } + int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef<const Value *> Operands) { return BaseT::getGEPCost(PointeeType, Ptr, Operands); @@ -386,6 +401,7 @@ public: } unsigned getInliningThresholdMultiplier() { return 1; } + unsigned adjustInliningThreshold(const CallBase *CB) { return 0; } int getInlinerVectorBonusPercent() { return 150; } @@ -477,6 +493,30 @@ public: return BaseT::emitGetActiveLaneMask(); } + Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, + IntrinsicInst &II) { + return BaseT::instCombineIntrinsic(IC, II); + } + + Optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, + IntrinsicInst &II, + APInt DemandedMask, + KnownBits &Known, + bool &KnownBitsComputed) { + return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known, + KnownBitsComputed); + } + + Optional<Value *> simplifyDemandedVectorEltsIntrinsic( + InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, + APInt &UndefElts2, APInt &UndefElts3, + std::function<void(Instruction *, unsigned, APInt, APInt &)> + SimplifyAndSetOp) { + return BaseT::simplifyDemandedVectorEltsIntrinsic( + IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, + SimplifyAndSetOp); + } + int getInstructionLatency(const Instruction *I) { if (isa<LoadInst>(I)) return getST()->getSchedModel().DefaultLoadLatency; @@ -532,6 +572,8 @@ public: unsigned getRegisterBitWidth(bool Vector) const { return 32; } + Optional<unsigned> getMaxVScale() const { return None; } + /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or /// extracted from vectors. @@ -567,7 +609,7 @@ public: return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); } - /// Estimate the overhead of scalarizing an instructions unique + /// Estimate the overhead of scalarizing an instruction's unique /// non-constant operands. The types of the arguments are ordinarily /// scalar, in which case the costs are multiplied with VF. unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, @@ -575,8 +617,14 @@ public: unsigned Cost = 0; SmallPtrSet<const Value*, 4> UniqueOperands; for (const Value *A : Args) { + // Disregard things like metadata arguments. + Type *Ty = A->getType(); + if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() && + !Ty->isPtrOrPtrVectorTy()) + continue; + if (!isa<Constant>(A) && UniqueOperands.insert(A).second) { - auto *VecTy = dyn_cast<VectorType>(A->getType()); + auto *VecTy = dyn_cast<VectorType>(Ty); if (VecTy) { // If A is a vector operand, VF should be 1 or correspond to A. assert((VF == 1 || @@ -584,7 +632,7 @@ public: "Vector argument does not match VF"); } else - VecTy = FixedVectorType::get(A->getType(), VF); + VecTy = FixedVectorType::get(Ty, VF); Cost += getScalarizationOverhead(VecTy, false, true); } @@ -658,7 +706,8 @@ public: if (auto *VTy = dyn_cast<VectorType>(Ty)) { unsigned Num = cast<FixedVectorType>(VTy)->getNumElements(); unsigned Cost = thisT()->getArithmeticInstrCost( - Opcode, VTy->getScalarType(), CostKind); + Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo, Args, CxtI); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. return getScalarizationOverhead(VTy, Args) + Num * Cost; @@ -681,19 +730,20 @@ public: case TTI::SK_PermuteTwoSrc: return getPermuteShuffleOverhead(cast<FixedVectorType>(Tp)); case TTI::SK_ExtractSubvector: - return getExtractSubvectorOverhead(cast<FixedVectorType>(Tp), Index, + return getExtractSubvectorOverhead(Tp, Index, cast<FixedVectorType>(SubTp)); case TTI::SK_InsertSubvector: - return getInsertSubvectorOverhead(cast<FixedVectorType>(Tp), Index, + return getInsertSubvectorOverhead(Tp, Index, cast<FixedVectorType>(SubTp)); } llvm_unreachable("Unknown TTI::ShuffleKind"); } unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) { - if (BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I) == 0) + if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0) return 0; const TargetLoweringBase *TLI = getTLI(); @@ -731,15 +781,12 @@ public: return 0; LLVM_FALLTHROUGH; case Instruction::SExt: - if (!I) - break; - - if (getTLI()->isExtFree(I)) + if (I && getTLI()->isExtFree(I)) return 0; // If this is a zext/sext of a load, return 0 if the corresponding // extending load exists on target. - if (I && isa<LoadInst>(I->getOperand(0))) { + if (CCH == TTI::CastContextHint::Normal) { EVT ExtVT = EVT::getEVT(Dst); EVT LoadVT = EVT::getEVT(Src); unsigned LType = @@ -814,7 +861,7 @@ public: unsigned SplitCost = (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0; return SplitCost + - (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, + (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH, CostKind, I)); } @@ -822,7 +869,7 @@ public: // the operation will get scalarized. unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements(); unsigned Cost = thisT()->getCastInstrCost( - Opcode, Dst->getScalarType(), Src->getScalarType(), CostKind, I); + Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. @@ -847,7 +894,7 @@ public: return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy, Index) + thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(), - TTI::TCK_RecipThroughput); + TTI::CastContextHint::None, TTI::TCK_RecipThroughput); } unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) { @@ -855,6 +902,7 @@ public: } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) { const TargetLoweringBase *TLI = getTLI(); @@ -863,7 +911,8 @@ public: // TODO: Handle other cost kinds. if (CostKind != TTI::TCK_RecipThroughput) - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, + I); // Selects on vectors are actually vector selects. if (ISD == ISD::SELECT) { @@ -888,7 +937,7 @@ public: if (CondTy) CondTy = CondTy->getScalarType(); unsigned Cost = thisT()->getCmpSelInstrCost( - Opcode, ValVTy->getScalarType(), CondTy, CostKind, I); + Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. @@ -922,7 +971,11 @@ public: return Cost; if (Src->isVectorTy() && - Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { + // In practice it's not currently possible to have a change in lane + // length for extending loads or truncating stores so both types should + // have the same scalable property. + TypeSize::isKnownLT(Src->getPrimitiveSizeInBits(), + LT.second.getSizeInBits())) { // This is a vector load that legalizes to a larger type than the vector // itself. Unless the corresponding extending load or truncating store is // legal, then this will scalarize. @@ -945,6 +998,51 @@ public: return Cost; } + unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, + const Value *Ptr, bool VariableMask, + Align Alignment, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) { + auto *VT = cast<FixedVectorType>(DataTy); + // Assume the target does not have support for gather/scatter operations + // and provide a rough estimate. + // + // First, compute the cost of extracting the individual addresses and the + // individual memory operations. + int LoadCost = + VT->getNumElements() * + (getVectorInstrCost( + Instruction::ExtractElement, + FixedVectorType::get(PointerType::get(VT->getElementType(), 0), + VT->getNumElements()), + -1) + + getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind)); + + // Next, compute the cost of packing the result in a vector. + int PackingCost = getScalarizationOverhead(VT, Opcode != Instruction::Store, + Opcode == Instruction::Store); + + int ConditionalCost = 0; + if (VariableMask) { + // Compute the cost of conditionally executing the memory operations with + // variable masks. This includes extracting the individual conditions, a + // branches and PHIs to combine the results. + // NOTE: Estimating the cost of conditionally executing the memory + // operations accurately is quite difficult and the current solution + // provides a very rough estimate only. + ConditionalCost = + VT->getNumElements() * + (getVectorInstrCost( + Instruction::ExtractElement, + FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()), + VT->getNumElements()), + -1) + + getCFInstrCost(Instruction::Br, CostKind) + + getCFInstrCost(Instruction::PHI, CostKind)); + } + + return LoadCost + PackingCost + ConditionalCost; + } + unsigned getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, @@ -1099,76 +1197,52 @@ public: /// Get intrinsic cost based on arguments. unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { - Intrinsic::ID IID = ICA.getID(); - - // Special case some scalar intrinsics. - if (CostKind != TTI::TCK_RecipThroughput) { - switch (IID) { - default: - break; - case Intrinsic::cttz: - if (getTLI()->isCheapToSpeculateCttz()) - return TargetTransformInfo::TCC_Basic; - break; - case Intrinsic::ctlz: - if (getTLI()->isCheapToSpeculateCtlz()) - return TargetTransformInfo::TCC_Basic; - break; - case Intrinsic::memcpy: - return thisT()->getMemcpyCost(ICA.getInst()); - // TODO: other libc intrinsics. - } - return BaseT::getIntrinsicInstrCost(ICA, CostKind); - } - + // Check for generically free intrinsics. if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0) return 0; - // TODO: Combine these two logic paths. + // Assume that target intrinsics are cheap. + Intrinsic::ID IID = ICA.getID(); + if (Function::isTargetIntrinsic(IID)) + return TargetTransformInfo::TCC_Basic; + if (ICA.isTypeBasedOnly()) return getTypeBasedIntrinsicInstrCost(ICA, CostKind); Type *RetTy = ICA.getReturnType(); - unsigned VF = ICA.getVectorFactor(); - unsigned RetVF = - (RetTy->isVectorTy() ? cast<FixedVectorType>(RetTy)->getNumElements() - : 1); - assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); + + ElementCount VF = ICA.getVectorFactor(); + ElementCount RetVF = + (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount() + : ElementCount::getFixed(1)); + assert((RetVF.isScalar() || VF.isScalar()) && + "VF > 1 and RetVF is a vector type"); const IntrinsicInst *I = ICA.getInst(); const SmallVectorImpl<const Value *> &Args = ICA.getArgs(); FastMathFlags FMF = ICA.getFlags(); - switch (IID) { - default: { - // Assume that we need to scalarize this intrinsic. - SmallVector<Type *, 4> Types; - for (const Value *Op : Args) { - Type *OpTy = Op->getType(); - assert(VF == 1 || !OpTy->isVectorTy()); - Types.push_back(VF == 1 ? OpTy : FixedVectorType::get(OpTy, VF)); - } + default: + break; - if (VF > 1 && !RetTy->isVoidTy()) - RetTy = FixedVectorType::get(RetTy, VF); - - // Compute the scalarization overhead based on Args for a vector - // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while - // CostModel will pass a vector RetTy and VF is 1. - unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); - if (RetVF > 1 || VF > 1) { - ScalarizationCost = 0; - if (!RetTy->isVoidTy()) - ScalarizationCost += - getScalarizationOverhead(cast<VectorType>(RetTy), true, false); - ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); - } + case Intrinsic::cttz: + // FIXME: If necessary, this should go in target-specific overrides. + if (VF.isScalar() && RetVF.isScalar() && + getTLI()->isCheapToSpeculateCttz()) + return TargetTransformInfo::TCC_Basic; + break; + + case Intrinsic::ctlz: + // FIXME: If necessary, this should go in target-specific overrides. + if (VF.isScalar() && RetVF.isScalar() && + getTLI()->isCheapToSpeculateCtlz()) + return TargetTransformInfo::TCC_Basic; + break; + + case Intrinsic::memcpy: + return thisT()->getMemcpyCost(ICA.getInst()); - IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF, - ScalarizationCost, I); - return thisT()->getIntrinsicInstrCost(Attrs, CostKind); - } case Intrinsic::masked_scatter: { - assert(VF == 1 && "Can't vectorize types here."); + assert(VF.isScalar() && "Can't vectorize types here."); const Value *Mask = Args[3]; bool VarMask = !isa<Constant>(Mask); Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue(); @@ -1177,31 +1251,57 @@ public: VarMask, Alignment, CostKind, I); } case Intrinsic::masked_gather: { - assert(VF == 1 && "Can't vectorize types here."); + assert(VF.isScalar() && "Can't vectorize types here."); const Value *Mask = Args[2]; bool VarMask = !isa<Constant>(Mask); Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue(); return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I); } - case Intrinsic::experimental_vector_reduce_add: - case Intrinsic::experimental_vector_reduce_mul: - case Intrinsic::experimental_vector_reduce_and: - case Intrinsic::experimental_vector_reduce_or: - case Intrinsic::experimental_vector_reduce_xor: - case Intrinsic::experimental_vector_reduce_v2_fadd: - case Intrinsic::experimental_vector_reduce_v2_fmul: - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: - case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: { + case Intrinsic::experimental_vector_extract: { + // FIXME: Handle case where a scalable vector is extracted from a scalable + // vector + if (isa<ScalableVectorType>(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue(); + return thisT()->getShuffleCost(TTI::SK_ExtractSubvector, + cast<VectorType>(Args[0]->getType()), + Index, cast<VectorType>(RetTy)); + } + case Intrinsic::experimental_vector_insert: { + // FIXME: Handle case where a scalable vector is inserted into a scalable + // vector + if (isa<ScalableVectorType>(Args[1]->getType())) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue(); + return thisT()->getShuffleCost( + TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), Index, + cast<VectorType>(Args[1]->getType())); + } + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: { IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I); - return getIntrinsicInstrCost(Attrs, CostKind); + return getTypeBasedIntrinsicInstrCost(Attrs, CostKind); + } + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: { + IntrinsicCostAttributes Attrs( + IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I); + return getTypeBasedIntrinsicInstrCost(Attrs, CostKind); } case Intrinsic::fshl: case Intrinsic::fshr: { + if (isa<ScalableVectorType>(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); const Value *X = Args[0]; const Value *Y = Args[1]; const Value *Z = Args[2]; @@ -1232,14 +1332,48 @@ public: // For non-rotates (X != Y) we must add shift-by-zero handling costs. if (X != Y) { Type *CondTy = RetTy->getWithNewBitWidth(1); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); } return Cost; } } + // TODO: Handle the remaining intrinsic with scalable vector type + if (isa<ScalableVectorType>(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + + // Assume that we need to scalarize this intrinsic. + SmallVector<Type *, 4> Types; + for (const Value *Op : Args) { + Type *OpTy = Op->getType(); + assert(VF.isScalar() || !OpTy->isVectorTy()); + Types.push_back(VF.isScalar() + ? OpTy + : FixedVectorType::get(OpTy, VF.getKnownMinValue())); + } + + if (VF.isVector() && !RetTy->isVoidTy()) + RetTy = FixedVectorType::get(RetTy, VF.getKnownMinValue()); + + // Compute the scalarization overhead based on Args for a vector + // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while + // CostModel will pass a vector RetTy and VF is 1. + unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); + if (RetVF.isVector() || VF.isVector()) { + ScalarizationCost = 0; + if (!RetTy->isVoidTy()) + ScalarizationCost += + getScalarizationOverhead(cast<VectorType>(RetTy), true, false); + ScalarizationCost += + getOperandsScalarizationOverhead(Args, VF.getKnownMinValue()); + } + + IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF, ScalarizationCost, I); + return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind); } /// Get intrinsic cost based on argument types. @@ -1255,10 +1389,21 @@ public: unsigned ScalarizationCostPassed = ICA.getScalarizationCost(); bool SkipScalarizationCost = ICA.skipScalarizationCost(); - auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast<VectorType>(Tys[0]); + VectorType *VecOpTy = nullptr; + if (!Tys.empty()) { + // The vector reduction operand is operand 0 except for fadd/fmul. + // Their operand 0 is a scalar start value, so the vector op is operand 1. + unsigned VecTyIndex = 0; + if (IID == Intrinsic::vector_reduce_fadd || + IID == Intrinsic::vector_reduce_fmul) + VecTyIndex = 1; + assert(Tys.size() > VecTyIndex && "Unexpected IntrinsicCostAttributes"); + VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]); + } + // Library call cost - other than size, make it expensive. + unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10; SmallVector<unsigned, 2> ISDs; - unsigned SingleCallCost = 10; // Library call cost. Make it expensive. switch (IID) { default: { // Assume that we need to scalarize this intrinsic. @@ -1327,13 +1472,15 @@ public: break; case Intrinsic::minnum: ISDs.push_back(ISD::FMINNUM); - if (FMF.noNaNs()) - ISDs.push_back(ISD::FMINIMUM); break; case Intrinsic::maxnum: ISDs.push_back(ISD::FMAXNUM); - if (FMF.noNaNs()) - ISDs.push_back(ISD::FMAXIMUM); + break; + case Intrinsic::minimum: + ISDs.push_back(ISD::FMINIMUM); + break; + case Intrinsic::maximum: + ISDs.push_back(ISD::FMAXIMUM); break; case Intrinsic::copysign: ISDs.push_back(ISD::FCOPYSIGN); @@ -1375,6 +1522,7 @@ public: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::sideeffect: + case Intrinsic::pseudoprobe: return 0; case Intrinsic::masked_store: { Type *Ty = Tys[0]; @@ -1388,50 +1536,72 @@ public: return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0, CostKind); } - case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::vector_reduce_add: return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::vector_reduce_mul: return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::vector_reduce_and: return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::vector_reduce_or: return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::vector_reduce_xor: return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::vector_reduce_fadd: // FIXME: Add new flag for cost of strict reductions. return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_v2_fmul: + case Intrinsic::vector_reduce_fmul: // FIXME: Add new flag for cost of strict reductions. return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy, /*IsPairwiseForm=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: return thisT()->getMinMaxReductionCost( VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)), /*IsPairwiseForm=*/false, /*IsUnsigned=*/false, CostKind); - case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: return thisT()->getMinMaxReductionCost( VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)), /*IsPairwiseForm=*/false, /*IsUnsigned=*/true, CostKind); + case Intrinsic::abs: + case Intrinsic::smax: + case Intrinsic::smin: + case Intrinsic::umax: + case Intrinsic::umin: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + // minmax(X,Y) = select(icmp(X,Y),X,Y) + Type *CondTy = RetTy->getWithNewBitWidth(1); + unsigned Cost = 0; + // TODO: Ideally getCmpSelInstrCost would accept an icmp condition code. + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + if (IID == Intrinsic::abs) + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue); + return Cost; + } case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { Type *CondTy = RetTy->getWithNewBitWidth(1); @@ -1447,10 +1617,12 @@ public: IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - CostKind); - Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += 2 * thisT()->getCmpSelInstrCost( + BinaryOperator::Select, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } case Intrinsic::uadd_sat: @@ -1466,8 +1638,9 @@ public: IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } case Intrinsic::smul_fix: @@ -1477,13 +1650,14 @@ public: unsigned ExtOp = IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; unsigned Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); Cost += thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, - CostKind); + CCH, CostKind); Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); @@ -1511,10 +1685,12 @@ public: // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) unsigned Cost = 0; Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); - Cost += 3 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, - OverflowTy, CostKind); - Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, OverflowTy, - OverflowTy, CostKind); + Cost += 3 * thisT()->getCmpSelInstrCost( + Instruction::ICmp, SumTy, OverflowTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += 2 * thisT()->getCmpSelInstrCost( + Instruction::Select, OverflowTy, OverflowTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy, CostKind); return Cost; @@ -1529,8 +1705,9 @@ public: unsigned Cost = 0; Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, - OverflowTy, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } case Intrinsic::smul_with_overflow: @@ -1542,13 +1719,14 @@ public: unsigned ExtOp = IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; + TTI::CastContextHint CCH = TTI::CastContextHint::None; unsigned Cost = 0; - Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CostKind); + Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); Cost += thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, - CostKind); + CCH, CostKind); Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy, CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); @@ -1558,8 +1736,9 @@ public: CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, - OverflowTy, CostKind); + Cost += + thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind); return Cost; } case Intrinsic::ctpop: @@ -1568,7 +1747,12 @@ public: // library call but still not a cheap instruction. SingleCallCost = TargetTransformInfo::TCC_Expensive; break; - // FIXME: ctlz, cttz, ... + case Intrinsic::ctlz: + ISDs.push_back(ISD::CTLZ); + break; + case Intrinsic::cttz: + ISDs.push_back(ISD::CTTZ); + break; case Intrinsic::bswap: ISDs.push_back(ISD::BSWAP); break; @@ -1604,7 +1788,7 @@ public: } } - auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); + auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); if (MinLegalCostI != LegalCost.end()) return *MinLegalCostI; @@ -1801,9 +1985,10 @@ public: (IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy); MinMaxCost += - thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CostKind) + + thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind) + thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, - CostKind); + CmpInst::BAD_ICMP_PREDICATE, CostKind); Ty = SubTy; ++LongVectorCount; } @@ -1825,15 +2010,37 @@ public: thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty); MinMaxCost += NumReduxLevels * - (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) + + (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, + CmpInst::BAD_ICMP_PREDICATE, CostKind) + thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, - CostKind)); + CmpInst::BAD_ICMP_PREDICATE, CostKind)); // The last min/max should be in vector registers and we counted it above. // So just need a single extractelement. return ShuffleCost + MinMaxCost + thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); } + InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, + Type *ResTy, VectorType *Ty, + TTI::TargetCostKind CostKind) { + // Without any native support, this is equivalent to the cost of + // vecreduce.add(ext) or if IsMLA vecreduce.add(mul(ext, ext)) + VectorType *ExtTy = VectorType::get(ResTy, Ty); + unsigned RedCost = thisT()->getArithmeticReductionCost( + Instruction::Add, ExtTy, false, CostKind); + unsigned MulCost = 0; + unsigned ExtCost = thisT()->getCastInstrCost( + IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty, + TTI::CastContextHint::None, CostKind); + if (IsMLA) { + MulCost = + thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); + ExtCost *= 2; + } + + return RedCost + MulCost + ExtCost; + } + unsigned getVectorSplitCost() { return 1; } /// @} diff --git a/llvm/include/llvm/CodeGen/CalcSpillWeights.h b/llvm/include/llvm/CodeGen/CalcSpillWeights.h index 9b8b7324f30a..78dae81f596e 100644 --- a/llvm/include/llvm/CodeGen/CalcSpillWeights.h +++ b/llvm/include/llvm/CodeGen/CalcSpillWeights.h @@ -44,64 +44,60 @@ class VirtRegMap; /// Calculate auxiliary information for a virtual register such as its /// spill weight and allocation hint. class VirtRegAuxInfo { - public: - using NormalizingFn = float (*)(float, unsigned, unsigned); - - private: MachineFunction &MF; LiveIntervals &LIS; - VirtRegMap *VRM; + const VirtRegMap &VRM; const MachineLoopInfo &Loops; const MachineBlockFrequencyInfo &MBFI; - DenseMap<unsigned, float> Hint; - NormalizingFn normalize; public: - VirtRegAuxInfo(MachineFunction &mf, LiveIntervals &lis, - VirtRegMap *vrm, const MachineLoopInfo &loops, - const MachineBlockFrequencyInfo &mbfi, - NormalizingFn norm = normalizeSpillWeight) - : MF(mf), LIS(lis), VRM(vrm), Loops(loops), MBFI(mbfi), normalize(norm) {} + VirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, + const VirtRegMap &VRM, const MachineLoopInfo &Loops, + const MachineBlockFrequencyInfo &MBFI) + : MF(MF), LIS(LIS), VRM(VRM), Loops(Loops), MBFI(MBFI) {} + + virtual ~VirtRegAuxInfo() = default; /// (re)compute li's spill weight and allocation hint. - void calculateSpillWeightAndHint(LiveInterval &li); + void calculateSpillWeightAndHint(LiveInterval &LI); - /// Compute future expected spill weight of a split artifact of li + /// Compute future expected spill weight of a split artifact of LI /// that will span between start and end slot indexes. - /// \param li The live interval to be split. - /// \param start The expected begining of the split artifact. Instructions + /// \param LI The live interval to be split. + /// \param Start The expected beginning of the split artifact. Instructions /// before start will not affect the weight. - /// \param end The expected end of the split artifact. Instructions + /// \param End The expected end of the split artifact. Instructions /// after end will not affect the weight. /// \return The expected spill weight of the split artifact. Returns - /// negative weight for unspillable li. - float futureWeight(LiveInterval &li, SlotIndex start, SlotIndex end); + /// negative weight for unspillable LI. + float futureWeight(LiveInterval &LI, SlotIndex Start, SlotIndex End); + + /// Compute spill weights and allocation hints for all virtual register + /// live intervals. + void calculateSpillWeightsAndHints(); + protected: /// Helper function for weight calculations. - /// (Re)compute li's spill weight and allocation hint, or, for non null + /// (Re)compute LI's spill weight and allocation hint, or, for non null /// start and end - compute future expected spill weight of a split - /// artifact of li that will span between start and end slot indexes. - /// \param li The live interval for which to compute the weight. - /// \param start The expected begining of the split artifact. Instructions + /// artifact of LI that will span between start and end slot indexes. + /// \param LI The live interval for which to compute the weight. + /// \param Start The expected beginning of the split artifact. Instructions /// before start will not affect the weight. Relevant for /// weight calculation of future split artifact. - /// \param end The expected end of the split artifact. Instructions + /// \param End The expected end of the split artifact. Instructions /// after end will not affect the weight. Relevant for /// weight calculation of future split artifact. - /// \return The spill weight. Returns negative weight for unspillable li. - float weightCalcHelper(LiveInterval &li, SlotIndex *start = nullptr, - SlotIndex *end = nullptr); - }; - - /// Compute spill weights and allocation hints for all virtual register - /// live intervals. - void calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineFunction &MF, - VirtRegMap *VRM, - const MachineLoopInfo &MLI, - const MachineBlockFrequencyInfo &MBFI, - VirtRegAuxInfo::NormalizingFn norm = - normalizeSpillWeight); + /// \return The spill weight. Returns negative weight for unspillable LI. + float weightCalcHelper(LiveInterval &LI, SlotIndex *Start = nullptr, + SlotIndex *End = nullptr); + /// Weight normalization function. + virtual float normalize(float UseDefFreq, unsigned Size, + unsigned NumInstr) { + return normalizeSpillWeight(UseDefFreq, Size, NumInstr); + } + }; } // end namespace llvm #endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index 8ebe788ac360..2fe4e371263b 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -16,7 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/TargetCallingConv.h" #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCRegisterInfo.h" @@ -25,6 +25,7 @@ namespace llvm { class CCState; +class MachineFunction; class MVT; class TargetRegisterInfo; @@ -339,6 +340,11 @@ public: return Regs.size(); } + void DeallocateReg(MCPhysReg Reg) { + assert(isAllocated(Reg) && "Trying to deallocate an unallocated register"); + MarkUnallocated(Reg); + } + /// AllocateReg - Attempt to allocate one register. If it is not available, /// return zero. Otherwise, return the register, marking it and any aliases /// as allocated. @@ -432,10 +438,7 @@ public: return AllocateStack(Size, Align(Alignment)); } - void ensureMaxAlignment(Align Alignment) { - if (!AnalyzingMustTailForwardedRegs) - MF.getFrameInfo().ensureMaxAlignment(Alignment); - } + void ensureMaxAlignment(Align Alignment); /// Version of AllocateStack with extra register to be shadowed. LLVM_ATTRIBUTE_DEPRECATED(unsigned AllocateStack(unsigned Size, @@ -572,6 +575,8 @@ public: private: /// MarkAllocated - Mark a register and all of its aliases as allocated. void MarkAllocated(MCPhysReg Reg); + + void MarkUnallocated(MCPhysReg Reg); }; } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h new file mode 100644 index 000000000000..893bc6e013f4 --- /dev/null +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -0,0 +1,1144 @@ +//===- Construction of codegen pass pipelines ------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Interfaces for registering analysis passes, producing common pass manager +/// configurations, and parsing of pass pipelines. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_CODEGENPASSBUILDER_H +#define LLVM_CODEGEN_CODEGENPASSBUILDER_H + +#include "llvm/ADT/FunctionExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFLAndersAliasAnalysis.h" +#include "llvm/Analysis/CFLSteensAliasAnalysis.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/ExpandReductions.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachinePassManager.h" +#include "llvm/CodeGen/PreISelIntrinsicLowering.h" +#include "llvm/CodeGen/UnreachableBlockElim.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/CGPassBuilderOption.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/ConstantHoisting.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Scalar/LoopStrengthReduce.h" +#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" +#include "llvm/Transforms/Scalar/MergeICmps.h" +#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" +#include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/EntryExitInstrumenter.h" +#include "llvm/Transforms/Utils/LowerInvoke.h" +#include <cassert> +#include <string> +#include <type_traits> +#include <utility> + +namespace llvm { + +// FIXME: Dummy target independent passes definitions that have not yet been +// ported to new pass manager. Once they do, remove these. +#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + struct PASS_NAME : public PassInfoMixin<PASS_NAME> { \ + template <typename... Ts> PASS_NAME(Ts &&...) {} \ + PreservedAnalyses run(Function &, FunctionAnalysisManager &) { \ + return PreservedAnalyses::all(); \ + } \ + }; +#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + struct PASS_NAME : public PassInfoMixin<PASS_NAME> { \ + template <typename... Ts> PASS_NAME(Ts &&...) {} \ + PreservedAnalyses run(Module &, ModuleAnalysisManager &) { \ + return PreservedAnalyses::all(); \ + } \ + }; +#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + struct PASS_NAME : public PassInfoMixin<PASS_NAME> { \ + template <typename... Ts> PASS_NAME(Ts &&...) {} \ + Error run(Module &, MachineFunctionAnalysisManager &) { \ + return Error::success(); \ + } \ + PreservedAnalyses run(MachineFunction &, \ + MachineFunctionAnalysisManager &) { \ + llvm_unreachable("this api is to make new PM api happy"); \ + } \ + static AnalysisKey Key; \ + }; +#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + struct PASS_NAME : public PassInfoMixin<PASS_NAME> { \ + template <typename... Ts> PASS_NAME(Ts &&...) {} \ + PreservedAnalyses run(MachineFunction &, \ + MachineFunctionAnalysisManager &) { \ + return PreservedAnalyses::all(); \ + } \ + static AnalysisKey Key; \ + }; +#include "MachinePassRegistry.def" + +/// This class provides access to building LLVM's passes. +/// +/// Its members provide the baseline state available to passes during their +/// construction. The \c MachinePassRegistry.def file specifies how to construct +/// all of the built-in passes, and those may reference these members during +/// construction. +template <typename DerivedT> class CodeGenPassBuilder { +public: + explicit CodeGenPassBuilder(LLVMTargetMachine &TM, CGPassBuilderOption Opts, + PassInstrumentationCallbacks *PIC) + : TM(TM), Opt(Opts), PIC(PIC) { + // Target could set CGPassBuilderOption::MISchedPostRA to true to achieve + // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID) + + // Target should override TM.Options.EnableIPRA in their target-specific + // LLVMTM ctor. See TargetMachine::setGlobalISel for example. + if (Opt.EnableIPRA) + TM.Options.EnableIPRA = *Opt.EnableIPRA; + + if (Opt.EnableGlobalISelAbort) + TM.Options.GlobalISelAbort = *Opt.EnableGlobalISelAbort; + + if (!Opt.OptimizeRegAlloc) + Opt.OptimizeRegAlloc = getOptLevel() != CodeGenOpt::None; + } + + Error buildPipeline(ModulePassManager &MPM, MachineFunctionPassManager &MFPM, + raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, + CodeGenFileType FileType) const; + + void registerModuleAnalyses(ModuleAnalysisManager &) const; + void registerFunctionAnalyses(FunctionAnalysisManager &) const; + void registerMachineFunctionAnalyses(MachineFunctionAnalysisManager &) const; + std::pair<StringRef, bool> getPassNameFromLegacyName(StringRef) const; + + void registerAnalyses(MachineFunctionAnalysisManager &MFAM) const { + registerModuleAnalyses(*MFAM.MAM); + registerFunctionAnalyses(*MFAM.FAM); + registerMachineFunctionAnalyses(MFAM); + } + + PassInstrumentationCallbacks *getPassInstrumentationCallbacks() const { + return PIC; + } + +protected: + template <typename PassT> using has_key_t = decltype(PassT::Key); + + template <typename PassT> + using is_module_pass_t = decltype(std::declval<PassT &>().run( + std::declval<Module &>(), std::declval<ModuleAnalysisManager &>())); + + template <typename PassT> + using is_function_pass_t = decltype(std::declval<PassT &>().run( + std::declval<Function &>(), std::declval<FunctionAnalysisManager &>())); + + // Function object to maintain state while adding codegen IR passes. + class AddIRPass { + public: + AddIRPass(ModulePassManager &MPM, bool DebugPM, bool Check = true) + : MPM(MPM), FPM(DebugPM) { + if (Check) + AddingFunctionPasses = false; + } + ~AddIRPass() { + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + + // Add Function Pass + template <typename PassT> + std::enable_if_t<is_detected<is_function_pass_t, PassT>::value> + operator()(PassT &&Pass) { + if (AddingFunctionPasses && !*AddingFunctionPasses) + AddingFunctionPasses = true; + FPM.addPass(std::forward<PassT>(Pass)); + } + + // Add Module Pass + template <typename PassT> + std::enable_if_t<is_detected<is_module_pass_t, PassT>::value && + !is_detected<is_function_pass_t, PassT>::value> + operator()(PassT &&Pass) { + assert((!AddingFunctionPasses || !*AddingFunctionPasses) && + "could not add module pass after adding function pass"); + MPM.addPass(std::forward<PassT>(Pass)); + } + + private: + ModulePassManager &MPM; + FunctionPassManager FPM; + // The codegen IR pipeline are mostly function passes with the exceptions of + // a few loop and module passes. `AddingFunctionPasses` make sures that + // we could only add module passes at the beginning of the pipeline. Once + // we begin adding function passes, we could no longer add module passes. + // This special-casing introduces less adaptor passes. If we have the need + // of adding module passes after function passes, we could change the + // implementation to accommodate that. + Optional<bool> AddingFunctionPasses; + }; + + // Function object to maintain state while adding codegen machine passes. + class AddMachinePass { + public: + AddMachinePass(MachineFunctionPassManager &PM) : PM(PM) {} + + template <typename PassT> void operator()(PassT &&Pass) { + static_assert( + is_detected<has_key_t, PassT>::value, + "Machine function pass must define a static member variable `Key`."); + for (auto &C : BeforeCallbacks) + if (!C(&PassT::Key)) + return; + PM.addPass(std::forward<PassT>(Pass)); + for (auto &C : AfterCallbacks) + C(&PassT::Key); + } + + template <typename PassT> void insertPass(AnalysisKey *ID, PassT Pass) { + AfterCallbacks.emplace_back( + [this, ID, Pass = std::move(Pass)](AnalysisKey *PassID) { + if (PassID == ID) + this->PM.addPass(std::move(Pass)); + }); + } + + void disablePass(AnalysisKey *ID) { + BeforeCallbacks.emplace_back( + [ID](AnalysisKey *PassID) { return PassID != ID; }); + } + + MachineFunctionPassManager releasePM() { return std::move(PM); } + + private: + MachineFunctionPassManager &PM; + SmallVector<llvm::unique_function<bool(AnalysisKey *)>, 4> BeforeCallbacks; + SmallVector<llvm::unique_function<void(AnalysisKey *)>, 4> AfterCallbacks; + }; + + LLVMTargetMachine &TM; + CGPassBuilderOption Opt; + PassInstrumentationCallbacks *PIC; + + /// Target override these hooks to parse target-specific analyses. + void registerTargetAnalysis(ModuleAnalysisManager &) const {} + void registerTargetAnalysis(FunctionAnalysisManager &) const {} + void registerTargetAnalysis(MachineFunctionAnalysisManager &) const {} + std::pair<StringRef, bool> getTargetPassNameFromLegacyName(StringRef) const { + return {"", false}; + } + + template <typename TMC> TMC &getTM() const { return static_cast<TMC &>(TM); } + CodeGenOpt::Level getOptLevel() const { return TM.getOptLevel(); } + + /// Check whether or not GlobalISel should abort on error. + /// When this is disabled, GlobalISel will fall back on SDISel instead of + /// erroring out. + bool isGlobalISelAbortEnabled() const { + return TM.Options.GlobalISelAbort == GlobalISelAbortMode::Enable; + } + + /// Check whether or not a diagnostic should be emitted when GlobalISel + /// uses the fallback path. In other words, it will emit a diagnostic + /// when GlobalISel failed and isGlobalISelAbortEnabled is false. + bool reportDiagnosticWhenGlobalISelFallback() const { + return TM.Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag; + } + + /// addInstSelector - This method should install an instruction selector pass, + /// which converts from LLVM code to machine instructions. + Error addInstSelector(AddMachinePass &) const { + return make_error<StringError>("addInstSelector is not overridden", + inconvertibleErrorCode()); + } + + /// Add passes that optimize instruction level parallelism for out-of-order + /// targets. These passes are run while the machine code is still in SSA + /// form, so they can use MachineTraceMetrics to control their heuristics. + /// + /// All passes added here should preserve the MachineDominatorTree, + /// MachineLoopInfo, and MachineTraceMetrics analyses. + void addILPOpts(AddMachinePass &) const {} + + /// This method may be implemented by targets that want to run passes + /// immediately before register allocation. + void addPreRegAlloc(AddMachinePass &) const {} + + /// addPreRewrite - Add passes to the optimized register allocation pipeline + /// after register allocation is complete, but before virtual registers are + /// rewritten to physical registers. + /// + /// These passes must preserve VirtRegMap and LiveIntervals, and when running + /// after RABasic or RAGreedy, they should take advantage of LiveRegMatrix. + /// When these passes run, VirtRegMap contains legal physreg assignments for + /// all virtual registers. + /// + /// Note if the target overloads addRegAssignAndRewriteOptimized, this may not + /// be honored. This is also not generally used for the the fast variant, + /// where the allocation and rewriting are done in one pass. + void addPreRewrite(AddMachinePass &) const {} + + /// Add passes to be run immediately after virtual registers are rewritten + /// to physical registers. + void addPostRewrite(AddMachinePass &) const {} + + /// This method may be implemented by targets that want to run passes after + /// register allocation pass pipeline but before prolog-epilog insertion. + void addPostRegAlloc(AddMachinePass &) const {} + + /// This method may be implemented by targets that want to run passes after + /// prolog-epilog insertion and before the second instruction scheduling pass. + void addPreSched2(AddMachinePass &) const {} + + /// This pass may be implemented by targets that want to run passes + /// immediately before machine code is emitted. + void addPreEmitPass(AddMachinePass &) const {} + + /// Targets may add passes immediately before machine code is emitted in this + /// callback. This is called even later than `addPreEmitPass`. + // FIXME: Rename `addPreEmitPass` to something more sensible given its actual + // position and remove the `2` suffix here as this callback is what + // `addPreEmitPass` *should* be but in reality isn't. + void addPreEmitPass2(AddMachinePass &) const {} + + /// {{@ For GlobalISel + /// + + /// addPreISel - This method should add any "last minute" LLVM->LLVM + /// passes (which are run just before instruction selector). + void addPreISel(AddIRPass &) const { + llvm_unreachable("addPreISel is not overridden"); + } + + /// This method should install an IR translator pass, which converts from + /// LLVM code to machine instructions with possibly generic opcodes. + Error addIRTranslator(AddMachinePass &) const { + return make_error<StringError>("addIRTranslator is not overridden", + inconvertibleErrorCode()); + } + + /// This method may be implemented by targets that want to run passes + /// immediately before legalization. + void addPreLegalizeMachineIR(AddMachinePass &) const {} + + /// This method should install a legalize pass, which converts the instruction + /// sequence into one that can be selected by the target. + Error addLegalizeMachineIR(AddMachinePass &) const { + return make_error<StringError>("addLegalizeMachineIR is not overridden", + inconvertibleErrorCode()); + } + + /// This method may be implemented by targets that want to run passes + /// immediately before the register bank selection. + void addPreRegBankSelect(AddMachinePass &) const {} + + /// This method should install a register bank selector pass, which + /// assigns register banks to virtual registers without a register + /// class or register banks. + Error addRegBankSelect(AddMachinePass &) const { + return make_error<StringError>("addRegBankSelect is not overridden", + inconvertibleErrorCode()); + } + + /// This method may be implemented by targets that want to run passes + /// immediately before the (global) instruction selection. + void addPreGlobalInstructionSelect(AddMachinePass &) const {} + + /// This method should install a (global) instruction selector pass, which + /// converts possibly generic instructions to fully target-specific + /// instructions, thereby constraining all generic virtual registers to + /// register classes. + Error addGlobalInstructionSelect(AddMachinePass &) const { + return make_error<StringError>( + "addGlobalInstructionSelect is not overridden", + inconvertibleErrorCode()); + } + /// @}} + + /// High level function that adds all passes necessary to go from llvm IR + /// representation to the MI representation. + /// Adds IR based lowering and target specific optimization passes and finally + /// the core instruction selection passes. + /// \returns true if an error occurred, false otherwise. + void addISelPasses(AddIRPass &) const; + + /// Add the actual instruction selection passes. This does not include + /// preparation passes on IR. + Error addCoreISelPasses(AddMachinePass &) const; + + /// Add the complete, standard set of LLVM CodeGen passes. + /// Fully developed targets will not generally override this. + Error addMachinePasses(AddMachinePass &) const; + + /// Add passes to lower exception handling for the code generator. + void addPassesToHandleExceptions(AddIRPass &) const; + + /// Add common target configurable passes that perform LLVM IR to IR + /// transforms following machine independent optimization. + void addIRPasses(AddIRPass &) const; + + /// Add pass to prepare the LLVM IR for code generation. This should be done + /// before exception handling preparation passes. + void addCodeGenPrepare(AddIRPass &) const; + + /// Add common passes that perform LLVM IR to IR transforms in preparation for + /// instruction selection. + void addISelPrepare(AddIRPass &) const; + + /// Methods with trivial inline returns are convenient points in the common + /// codegen pass pipeline where targets may insert passes. Methods with + /// out-of-line standard implementations are major CodeGen stages called by + /// addMachinePasses. Some targets may override major stages when inserting + /// passes is insufficient, but maintaining overriden stages is more work. + /// + + /// addMachineSSAOptimization - Add standard passes that optimize machine + /// instructions in SSA form. + void addMachineSSAOptimization(AddMachinePass &) const; + + /// addFastRegAlloc - Add the minimum set of target-independent passes that + /// are required for fast register allocation. + Error addFastRegAlloc(AddMachinePass &) const; + + /// addOptimizedRegAlloc - Add passes related to register allocation. + /// LLVMTargetMachine provides standard regalloc passes for most targets. + void addOptimizedRegAlloc(AddMachinePass &) const; + + /// Add passes that optimize machine instructions after register allocation. + void addMachineLateOptimization(AddMachinePass &) const; + + /// addGCPasses - Add late codegen passes that analyze code for garbage + /// collection. This should return true if GC info should be printed after + /// these passes. + void addGCPasses(AddMachinePass &) const {} + + /// Add standard basic block placement passes. + void addBlockPlacement(AddMachinePass &) const; + + using CreateMCStreamer = + std::function<Expected<std::unique_ptr<MCStreamer>>(MCContext &)>; + void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const { + llvm_unreachable("addAsmPrinter is not overridden"); + } + + /// Utilities for targets to add passes to the pass manager. + /// + + /// createTargetRegisterAllocator - Create the register allocator pass for + /// this target at the current optimization level. + void addTargetRegisterAllocator(AddMachinePass &, bool Optimized) const; + + /// addMachinePasses helper to create the target-selected or overriden + /// regalloc pass. + void addRegAllocPass(AddMachinePass &, bool Optimized) const; + + /// Add core register alloator passes which do the actual register assignment + /// and rewriting. \returns true if any passes were added. + Error addRegAssignmentFast(AddMachinePass &) const; + Error addRegAssignmentOptimized(AddMachinePass &) const; + +private: + DerivedT &derived() { return static_cast<DerivedT &>(*this); } + const DerivedT &derived() const { + return static_cast<const DerivedT &>(*this); + } +}; + +template <typename Derived> +Error CodeGenPassBuilder<Derived>::buildPipeline( + ModulePassManager &MPM, MachineFunctionPassManager &MFPM, + raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, + CodeGenFileType FileType) const { + AddIRPass addIRPass(MPM, Opt.DebugPM); + addISelPasses(addIRPass); + + AddMachinePass addPass(MFPM); + if (auto Err = addCoreISelPasses(addPass)) + return std::move(Err); + + if (auto Err = derived().addMachinePasses(addPass)) + return std::move(Err); + + derived().addAsmPrinter( + addPass, [this, &Out, DwoOut, FileType](MCContext &Ctx) { + return this->TM.createMCStreamer(Out, DwoOut, FileType, Ctx); + }); + + addPass(FreeMachineFunctionPass()); + return Error::success(); +} + +static inline AAManager registerAAAnalyses(CFLAAType UseCFLAA) { + AAManager AA; + + // The order in which these are registered determines their priority when + // being queried. + + switch (UseCFLAA) { + case CFLAAType::Steensgaard: + AA.registerFunctionAnalysis<CFLSteensAA>(); + break; + case CFLAAType::Andersen: + AA.registerFunctionAnalysis<CFLAndersAA>(); + break; + case CFLAAType::Both: + AA.registerFunctionAnalysis<CFLAndersAA>(); + AA.registerFunctionAnalysis<CFLSteensAA>(); + break; + default: + break; + } + + // Basic AliasAnalysis support. + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + AA.registerFunctionAnalysis<TypeBasedAA>(); + AA.registerFunctionAnalysis<ScopedNoAliasAA>(); + AA.registerFunctionAnalysis<BasicAA>(); + + return AA; +} + +template <typename Derived> +void CodeGenPassBuilder<Derived>::registerModuleAnalyses( + ModuleAnalysisManager &MAM) const { +#define MODULE_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \ + MAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; }); +#include "MachinePassRegistry.def" + derived().registerTargetAnalysis(MAM); +} + +template <typename Derived> +void CodeGenPassBuilder<Derived>::registerFunctionAnalyses( + FunctionAnalysisManager &FAM) const { + FAM.registerPass([this] { return registerAAAnalyses(this->Opt.UseCFLAA); }); + +#define FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \ + FAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; }); +#include "MachinePassRegistry.def" + derived().registerTargetAnalysis(FAM); +} + +template <typename Derived> +void CodeGenPassBuilder<Derived>::registerMachineFunctionAnalyses( + MachineFunctionAnalysisManager &MFAM) const { +#define MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) \ + MFAM.registerPass([&] { return PASS_NAME CONSTRUCTOR; }); +#include "MachinePassRegistry.def" + derived().registerTargetAnalysis(MFAM); +} + +// FIXME: For new PM, use pass name directly in commandline seems good. +// Translate stringfied pass name to its old commandline name. Returns the +// matching legacy name and a boolean value indicating if the pass is a machine +// pass. +template <typename Derived> +std::pair<StringRef, bool> +CodeGenPassBuilder<Derived>::getPassNameFromLegacyName(StringRef Name) const { + std::pair<StringRef, bool> Ret; + if (Name.empty()) + return Ret; + +#define FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, false}; +#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, false}; +#define MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, false}; +#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, false}; +#define MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, true}; +#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, true}; +#define MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, true}; +#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) \ + if (Name == NAME) \ + Ret = {#PASS_NAME, true}; +#include "llvm/CodeGen/MachinePassRegistry.def" + + if (Ret.first.empty()) + Ret = derived().getTargetPassNameFromLegacyName(Name); + + if (Ret.first.empty()) + report_fatal_error(Twine('\"') + Twine(Name) + + Twine("\" pass could not be found.")); + + return Ret; +} + +template <typename Derived> +void CodeGenPassBuilder<Derived>::addISelPasses(AddIRPass &addPass) const { + if (TM.useEmulatedTLS()) + addPass(LowerEmuTLSPass()); + + addPass(PreISelIntrinsicLoweringPass()); + + derived().addIRPasses(addPass); + derived().addCodeGenPrepare(addPass); + addPassesToHandleExceptions(addPass); + derived().addISelPrepare(addPass); +} + +/// Add common target configurable passes that perform LLVM IR to IR transforms +/// following machine independent optimization. +template <typename Derived> +void CodeGenPassBuilder<Derived>::addIRPasses(AddIRPass &addPass) const { + // Before running any passes, run the verifier to determine if the input + // coming from the front-end and/or optimizer is valid. + if (!Opt.DisableVerify) + addPass(VerifierPass()); + + // Run loop strength reduction before anything else. + if (getOptLevel() != CodeGenOpt::None && !Opt.DisableLSR) { + addPass(createFunctionToLoopPassAdaptor( + LoopStrengthReducePass(), /*UseMemorySSA*/ true, Opt.DebugPM)); + // FIXME: use -stop-after so we could remove PrintLSR + if (Opt.PrintLSR) + addPass(PrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); + } + + if (getOptLevel() != CodeGenOpt::None) { + // The MergeICmpsPass tries to create memcmp calls by grouping sequences of + // loads and compares. ExpandMemCmpPass then tries to expand those calls + // into optimally-sized loads and compares. The transforms are enabled by a + // target lowering hook. + if (!Opt.DisableMergeICmps) + addPass(MergeICmpsPass()); + addPass(ExpandMemCmpPass()); + } + + // Run GC lowering passes for builtin collectors + // TODO: add a pass insertion point here + addPass(GCLoweringPass()); + addPass(ShadowStackGCLoweringPass()); + addPass(LowerConstantIntrinsicsPass()); + + // Make sure that no unreachable blocks are instruction selected. + addPass(UnreachableBlockElimPass()); + + // Prepare expensive constants for SelectionDAG. + if (getOptLevel() != CodeGenOpt::None && !Opt.DisableConstantHoisting) + addPass(ConstantHoistingPass()); + + if (getOptLevel() != CodeGenOpt::None && !Opt.DisablePartialLibcallInlining) + addPass(PartiallyInlineLibCallsPass()); + + // Instrument function entry and exit, e.g. with calls to mcount(). + addPass(EntryExitInstrumenterPass(/*PostInlining=*/true)); + + // Add scalarization of target's unsupported masked memory intrinsics pass. + // the unsupported intrinsic will be replaced with a chain of basic blocks, + // that stores/loads element one-by-one if the appropriate mask bit is set. + addPass(ScalarizeMaskedMemIntrinPass()); + + // Expand reduction intrinsics into shuffle sequences if the target wants to. + addPass(ExpandReductionsPass()); +} + +/// Turn exception handling constructs into something the code generators can +/// handle. +template <typename Derived> +void CodeGenPassBuilder<Derived>::addPassesToHandleExceptions( + AddIRPass &addPass) const { + const MCAsmInfo *MCAI = TM.getMCAsmInfo(); + assert(MCAI && "No MCAsmInfo"); + switch (MCAI->getExceptionHandlingType()) { + case ExceptionHandling::SjLj: + // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both + // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, + // catch info can get misplaced when a selector ends up more than one block + // removed from the parent invoke(s). This could happen when a landing + // pad is shared by multiple invokes and is also a target of a normal + // edge from elsewhere. + addPass(SjLjEHPreparePass()); + LLVM_FALLTHROUGH; + case ExceptionHandling::DwarfCFI: + case ExceptionHandling::ARM: + case ExceptionHandling::AIX: + addPass(DwarfEHPass(getOptLevel())); + break; + case ExceptionHandling::WinEH: + // We support using both GCC-style and MSVC-style exceptions on Windows, so + // add both preparation passes. Each pass will only actually run if it + // recognizes the personality function. + addPass(WinEHPass()); + addPass(DwarfEHPass(getOptLevel())); + break; + case ExceptionHandling::Wasm: + // Wasm EH uses Windows EH instructions, but it does not need to demote PHIs + // on catchpads and cleanuppads because it does not outline them into + // funclets. Catchswitch blocks are not lowered in SelectionDAG, so we + // should remove PHIs there. + addPass(WinEHPass(/*DemoteCatchSwitchPHIOnly=*/false)); + addPass(WasmEHPass()); + break; + case ExceptionHandling::None: + addPass(LowerInvokePass()); + + // The lower invoke pass may create unreachable code. Remove it. + addPass(UnreachableBlockElimPass()); + break; + } +} + +/// Add pass to prepare the LLVM IR for code generation. This should be done +/// before exception handling preparation passes. +template <typename Derived> +void CodeGenPassBuilder<Derived>::addCodeGenPrepare(AddIRPass &addPass) const { + if (getOptLevel() != CodeGenOpt::None && !Opt.DisableCGP) + addPass(CodeGenPreparePass()); + // TODO: Default ctor'd RewriteSymbolPass is no-op. + // addPass(RewriteSymbolPass()); +} + +/// Add common passes that perform LLVM IR to IR transforms in preparation for +/// instruction selection. +template <typename Derived> +void CodeGenPassBuilder<Derived>::addISelPrepare(AddIRPass &addPass) const { + derived().addPreISel(addPass); + + // Add both the safe stack and the stack protection passes: each of them will + // only protect functions that have corresponding attributes. + addPass(SafeStackPass()); + addPass(StackProtectorPass()); + + if (Opt.PrintISelInput) + addPass(PrintFunctionPass(dbgs(), + "\n\n*** Final LLVM Code input to ISel ***\n")); + + // All passes which modify the LLVM IR are now complete; run the verifier + // to ensure that the IR is valid. + if (!Opt.DisableVerify) + addPass(VerifierPass()); +} + +template <typename Derived> +Error CodeGenPassBuilder<Derived>::addCoreISelPasses( + AddMachinePass &addPass) const { + // Enable FastISel with -fast-isel, but allow that to be overridden. + TM.setO0WantsFastISel(Opt.EnableFastISelOption.getValueOr(true)); + + // Determine an instruction selector. + enum class SelectorType { SelectionDAG, FastISel, GlobalISel }; + SelectorType Selector; + + if (Opt.EnableFastISelOption && *Opt.EnableFastISelOption == true) + Selector = SelectorType::FastISel; + else if ((Opt.EnableGlobalISelOption && + *Opt.EnableGlobalISelOption == true) || + (TM.Options.EnableGlobalISel && + (!Opt.EnableGlobalISelOption || + *Opt.EnableGlobalISelOption == false))) + Selector = SelectorType::GlobalISel; + else if (TM.getOptLevel() == CodeGenOpt::None && TM.getO0WantsFastISel()) + Selector = SelectorType::FastISel; + else + Selector = SelectorType::SelectionDAG; + + // Set consistently TM.Options.EnableFastISel and EnableGlobalISel. + if (Selector == SelectorType::FastISel) { + TM.setFastISel(true); + TM.setGlobalISel(false); + } else if (Selector == SelectorType::GlobalISel) { + TM.setFastISel(false); + TM.setGlobalISel(true); + } + + // Add instruction selector passes. + if (Selector == SelectorType::GlobalISel) { + if (auto Err = derived().addIRTranslator(addPass)) + return std::move(Err); + + derived().addPreLegalizeMachineIR(addPass); + + if (auto Err = derived().addLegalizeMachineIR(addPass)) + return std::move(Err); + + // Before running the register bank selector, ask the target if it + // wants to run some passes. + derived().addPreRegBankSelect(addPass); + + if (auto Err = derived().addRegBankSelect(addPass)) + return std::move(Err); + + derived().addPreGlobalInstructionSelect(addPass); + + if (auto Err = derived().addGlobalInstructionSelect(addPass)) + return std::move(Err); + + // Pass to reset the MachineFunction if the ISel failed. + addPass(ResetMachineFunctionPass(reportDiagnosticWhenGlobalISelFallback(), + isGlobalISelAbortEnabled())); + + // Provide a fallback path when we do not want to abort on + // not-yet-supported input. + if (!isGlobalISelAbortEnabled()) + if (auto Err = derived().addInstSelector(addPass)) + return std::move(Err); + + } else if (auto Err = derived().addInstSelector(addPass)) + return std::move(Err); + + // Expand pseudo-instructions emitted by ISel. Don't run the verifier before + // FinalizeISel. + addPass(FinalizeISelPass()); + + // // Print the instruction selected machine code... + // printAndVerify("After Instruction Selection"); + + return Error::success(); +} + +/// Add the complete set of target-independent postISel code generator passes. +/// +/// This can be read as the standard order of major LLVM CodeGen stages. Stages +/// with nontrivial configuration or multiple passes are broken out below in +/// add%Stage routines. +/// +/// Any CodeGenPassBuilder<Derived>::addXX routine may be overriden by the +/// Target. The addPre/Post methods with empty header implementations allow +/// injecting target-specific fixups just before or after major stages. +/// Additionally, targets have the flexibility to change pass order within a +/// stage by overriding default implementation of add%Stage routines below. Each +/// technique has maintainability tradeoffs because alternate pass orders are +/// not well supported. addPre/Post works better if the target pass is easily +/// tied to a common pass. But if it has subtle dependencies on multiple passes, +/// the target should override the stage instead. +template <typename Derived> +Error CodeGenPassBuilder<Derived>::addMachinePasses( + AddMachinePass &addPass) const { + // Add passes that optimize machine instructions in SSA form. + if (getOptLevel() != CodeGenOpt::None) { + derived().addMachineSSAOptimization(addPass); + } else { + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(LocalStackSlotPass()); + } + + if (TM.Options.EnableIPRA) + addPass(RegUsageInfoPropagationPass()); + + // Run pre-ra passes. + derived().addPreRegAlloc(addPass); + + // Run register allocation and passes that are tightly coupled with it, + // including phi elimination and scheduling. + if (*Opt.OptimizeRegAlloc) { + derived().addOptimizedRegAlloc(addPass); + } else { + if (auto Err = derived().addFastRegAlloc(addPass)) + return Err; + } + + // Run post-ra passes. + derived().addPostRegAlloc(addPass); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + if (getOptLevel() != CodeGenOpt::None) { + addPass(PostRAMachineSinkingPass()); + addPass(ShrinkWrapPass()); + } + + addPass(PrologEpilogInserterPass()); + + /// Add passes that optimize machine instructions after register allocation. + if (getOptLevel() != CodeGenOpt::None) + derived().addMachineLateOptimization(addPass); + + // Expand pseudo instructions before second scheduling pass. + addPass(ExpandPostRAPseudosPass()); + + // Run pre-sched2 passes. + derived().addPreSched2(addPass); + + if (Opt.EnableImplicitNullChecks) + addPass(ImplicitNullChecksPass()); + + // Second pass scheduler. + // Let Target optionally insert this pass by itself at some other + // point. + if (getOptLevel() != CodeGenOpt::None && + !TM.targetSchedulesPostRAScheduling()) { + if (Opt.MISchedPostRA) + addPass(PostMachineSchedulerPass()); + else + addPass(PostRASchedulerPass()); + } + + // GC + derived().addGCPasses(addPass); + + // Basic block placement. + if (getOptLevel() != CodeGenOpt::None) + derived().addBlockPlacement(addPass); + + // Insert before XRay Instrumentation. + addPass(FEntryInserterPass()); + + addPass(XRayInstrumentationPass()); + addPass(PatchableFunctionPass()); + + derived().addPreEmitPass(addPass); + + if (TM.Options.EnableIPRA) + // Collect register usage information and produce a register mask of + // clobbered registers, to be used to optimize call sites. + addPass(RegUsageInfoCollectorPass()); + + addPass(FuncletLayoutPass()); + + addPass(StackMapLivenessPass()); + addPass(LiveDebugValuesPass()); + + if (TM.Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None && + Opt.EnableMachineOutliner != RunOutliner::NeverOutline) { + bool RunOnAllFunctions = + (Opt.EnableMachineOutliner == RunOutliner::AlwaysOutline); + bool AddOutliner = RunOnAllFunctions || TM.Options.SupportsDefaultOutlining; + if (AddOutliner) + addPass(MachineOutlinerPass(RunOnAllFunctions)); + } + + // Add passes that directly emit MI after all other MI passes. + derived().addPreEmitPass2(addPass); + + return Error::success(); +} + +/// Add passes that optimize machine instructions in SSA form. +template <typename Derived> +void CodeGenPassBuilder<Derived>::addMachineSSAOptimization( + AddMachinePass &addPass) const { + // Pre-ra tail duplication. + addPass(EarlyTailDuplicatePass()); + + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + addPass(OptimizePHIsPass()); + + // This pass merges large allocas. StackSlotColoring is a different pass + // which merges spill slots. + addPass(StackColoringPass()); + + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(LocalStackSlotPass()); + + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + addPass(DeadMachineInstructionElimPass()); + + // Allow targets to insert passes that improve instruction level parallelism, + // like if-conversion. Such passes will typically need dominator trees and + // loop info, just like LICM and CSE below. + derived().addILPOpts(addPass); + + addPass(EarlyMachineLICMPass()); + addPass(MachineCSEPass()); + + addPass(MachineSinkingPass()); + + addPass(PeepholeOptimizerPass()); + // Clean-up the dead code that may have been generated by peephole + // rewriting. + addPass(DeadMachineInstructionElimPass()); +} + +//===---------------------------------------------------------------------===// +/// Register Allocation Pass Configuration +//===---------------------------------------------------------------------===// + +/// Instantiate the default register allocator pass for this target for either +/// the optimized or unoptimized allocation path. This will be added to the pass +/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc +/// in the optimized case. +/// +/// A target that uses the standard regalloc pass order for fast or optimized +/// allocation may still override this for per-target regalloc +/// selection. But -regalloc=... always takes precedence. +template <typename Derived> +void CodeGenPassBuilder<Derived>::addTargetRegisterAllocator( + AddMachinePass &addPass, bool Optimized) const { + if (Optimized) + addPass(RAGreedyPass()); + else + addPass(RAFastPass()); +} + +/// Find and instantiate the register allocation pass requested by this target +/// at the current optimization level. Different register allocators are +/// defined as separate passes because they may require different analysis. +template <typename Derived> +void CodeGenPassBuilder<Derived>::addRegAllocPass(AddMachinePass &addPass, + bool Optimized) const { + if (Opt.RegAlloc == RegAllocType::Default) + // With no -regalloc= override, ask the target for a regalloc pass. + derived().addTargetRegisterAllocator(addPass, Optimized); + else if (Opt.RegAlloc == RegAllocType::Basic) + addPass(RABasicPass()); + else if (Opt.RegAlloc == RegAllocType::Fast) + addPass(RAFastPass()); + else if (Opt.RegAlloc == RegAllocType::Greedy) + addPass(RAGreedyPass()); + else if (Opt.RegAlloc == RegAllocType::PBQP) + addPass(RAPBQPPass()); + else + llvm_unreachable("unknonwn register allocator type"); +} + +template <typename Derived> +Error CodeGenPassBuilder<Derived>::addRegAssignmentFast( + AddMachinePass &addPass) const { + if (Opt.RegAlloc != RegAllocType::Default && + Opt.RegAlloc != RegAllocType::Fast) + return make_error<StringError>( + "Must use fast (default) register allocator for unoptimized regalloc.", + inconvertibleErrorCode()); + + addRegAllocPass(addPass, false); + return Error::success(); +} + +template <typename Derived> +Error CodeGenPassBuilder<Derived>::addRegAssignmentOptimized( + AddMachinePass &addPass) const { + // Add the selected register allocation pass. + addRegAllocPass(addPass, true); + + // Allow targets to change the register assignments before rewriting. + derived().addPreRewrite(addPass); + + // Finally rewrite virtual registers. + addPass(VirtRegRewriterPass()); + // Perform stack slot coloring and post-ra machine LICM. + // + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + addPass(StackSlotColoringPass()); + + return Error::success(); +} + +/// Add the minimum set of target-independent passes that are required for +/// register allocation. No coalescing or scheduling. +template <typename Derived> +Error CodeGenPassBuilder<Derived>::addFastRegAlloc( + AddMachinePass &addPass) const { + addPass(PHIEliminationPass()); + addPass(TwoAddressInstructionPass()); + return derived().addRegAssignmentFast(addPass); +} + +/// Add standard target-independent passes that are tightly coupled with +/// optimized register allocation, including coalescing, machine instruction +/// scheduling, and register allocation itself. +template <typename Derived> +void CodeGenPassBuilder<Derived>::addOptimizedRegAlloc( + AddMachinePass &addPass) const { + addPass(DetectDeadLanesPass()); + + addPass(ProcessImplicitDefsPass()); + + // Edge splitting is smarter with machine loop info. + addPass(PHIEliminationPass()); + + // Eventually, we want to run LiveIntervals before PHI elimination. + if (Opt.EarlyLiveIntervals) + addPass(LiveIntervalsPass()); + + addPass(TwoAddressInstructionPass()); + addPass(RegisterCoalescerPass()); + + // The machine scheduler may accidentally create disconnected components + // when moving subregister definitions around, avoid this by splitting them to + // separate vregs before. Splitting can also improve reg. allocation quality. + addPass(RenameIndependentSubregsPass()); + + // PreRA instruction scheduling. + addPass(MachineSchedulerPass()); + + if (derived().addRegAssignmentOptimized(addPass)) { + // Allow targets to expand pseudo instructions depending on the choice of + // registers before MachineCopyPropagation. + derived().addPostRewrite(addPass); + + // Copy propagate to forward register uses and try to eliminate COPYs that + // were not coalesced. + addPass(MachineCopyPropagationPass()); + + // Run post-ra machine LICM to hoist reloads / remats. + // + // FIXME: can this move into MachineLateOptimization? + addPass(MachineLICMPass()); + } +} + +//===---------------------------------------------------------------------===// +/// Post RegAlloc Pass Configuration +//===---------------------------------------------------------------------===// + +/// Add passes that optimize machine instructions after register allocation. +template <typename Derived> +void CodeGenPassBuilder<Derived>::addMachineLateOptimization( + AddMachinePass &addPass) const { + // Branch folding must be run after regalloc and prolog/epilog insertion. + addPass(BranchFolderPass()); + + // Tail duplication. + // Note that duplicating tail just increases code size and degrades + // performance for targets that require Structured Control Flow. + // In addition it can also make CFG irreducible. Thus we disable it. + if (!TM.requiresStructuredCFG()) + addPass(TailDuplicatePass()); + + // Copy propagation. + addPass(MachineCopyPropagationPass()); +} + +/// Add standard basic block placement passes. +template <typename Derived> +void CodeGenPassBuilder<Derived>::addBlockPlacement( + AddMachinePass &addPass) const { + addPass(MachineBlockPlacementPass()); + // Run a separate pass to collect block placement statistics. + if (Opt.EnableBlockPlacementStats) + addPass(MachineBlockPlacementStatsPass()); +} + +} // namespace llvm + +#endif // LLVM_CODEGEN_CODEGENPASSBUILDER_H diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index 1b77556dcbb1..e6c64cd4dd8e 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -14,6 +14,7 @@ #include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/MC/MCTargetOptionsCommandFlags.h" @@ -74,6 +75,8 @@ bool getDontPlaceZerosInBSS(); bool getEnableGuaranteedTailCallOpt(); +bool getEnableAIXExtendedAltivecABI(); + bool getDisableTailCalls(); bool getStackSymbolOrdering(); @@ -94,8 +97,16 @@ Optional<bool> getExplicitDataSections(); bool getFunctionSections(); Optional<bool> getExplicitFunctionSections(); +bool getIgnoreXCOFFVisibility(); + +bool getXCOFFTracebackTable(); + std::string getBBSections(); +std::string getStackProtectorGuard(); +unsigned getStackProtectorGuardOffset(); +std::string getStackProtectorGuardReg(); + unsigned getTLSSize(); bool getEmulatedTLS(); @@ -114,8 +125,14 @@ bool getEnableAddrsig(); bool getEmitCallSiteInfo(); +bool getEnableMachineFunctionSplitter(); + bool getEnableDebugEntryValues(); +bool getPseudoProbeForProfiling(); + +bool getValueTrackingVariableLocations(); + bool getForceDwarfFrameSection(); bool getXRayOmitFunctionIndex(); @@ -128,9 +145,16 @@ struct RegisterCodeGenFlags { llvm::BasicBlockSection getBBSectionsMode(llvm::TargetOptions &Options); -// Common utility function tightly tied to the options listed here. Initializes -// a TargetOptions object with CodeGen flags and returns it. -TargetOptions InitTargetOptionsFromCodeGenFlags(); +llvm::StackProtectorGuards +getStackProtectorGuardMode(llvm::TargetOptions &Options); + +/// Common utility function tightly tied to the options listed here. Initializes +/// a TargetOptions object with CodeGen flags and returns it. +/// \p TheTriple is used to determine the default value for options if +/// options are not explicitly specified. If those triple dependant options +/// value do not have effect for your component, a default Triple() could be +/// passed in. +TargetOptions InitTargetOptionsFromCodeGenFlags(const llvm::Triple &TheTriple); std::string getCPUStr(); diff --git a/llvm/include/llvm/CodeGen/DIE.h b/llvm/include/llvm/CodeGen/DIE.h index c7baaf6aef3d..3efef6ec0acd 100644 --- a/llvm/include/llvm/CodeGen/DIE.h +++ b/llvm/include/llvm/CodeGen/DIE.h @@ -247,6 +247,7 @@ public: unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const; void print(raw_ostream &O) const; + uint64_t getIndex() const { return Index; } }; //===--------------------------------------------------------------------===// @@ -382,12 +383,12 @@ private: static_assert(std::is_standard_layout<T>::value || std::is_pointer<T>::value, "Expected standard layout or pointer"); - new (reinterpret_cast<void *>(Val.buffer)) T(V); + new (reinterpret_cast<void *>(&Val)) T(V); } - template <class T> T *get() { return reinterpret_cast<T *>(Val.buffer); } + template <class T> T *get() { return reinterpret_cast<T *>(&Val); } template <class T> const T *get() const { - return reinterpret_cast<const T *>(Val.buffer); + return reinterpret_cast<const T *>(&Val); } template <class T> void destruct() { get<T>()->~T(); } @@ -589,7 +590,6 @@ public: T &operator*() const { return *static_cast<T *>(N); } bool operator==(const iterator &X) const { return N == X.N; } - bool operator!=(const iterator &X) const { return N != X.N; } }; class const_iterator @@ -612,7 +612,6 @@ public: const T &operator*() const { return *static_cast<const T *>(N); } bool operator==(const const_iterator &X) const { return N == X.N; } - bool operator!=(const const_iterator &X) const { return N != X.N; } }; iterator begin() { @@ -788,7 +787,7 @@ public: /// Get the absolute offset within the .debug_info or .debug_types section /// for this DIE. - unsigned getDebugSectionOffset() const; + uint64_t getDebugSectionOffset() const; /// Compute the offset of this DIE and all its children. /// @@ -864,14 +863,11 @@ class DIEUnit { /// a valid section depending on the client that is emitting DWARF. MCSection *Section; uint64_t Offset; /// .debug_info or .debug_types absolute section offset. - uint32_t Length; /// The length in bytes of all of the DIEs in this unit. - const uint16_t Version; /// The Dwarf version number for this unit. - const uint8_t AddrSize; /// The size in bytes of an address for this unit. protected: virtual ~DIEUnit() = default; public: - DIEUnit(uint16_t Version, uint8_t AddrSize, dwarf::Tag UnitTag); + explicit DIEUnit(dwarf::Tag UnitTag); DIEUnit(const DIEUnit &RHS) = delete; DIEUnit(DIEUnit &&RHS) = delete; void operator=(const DIEUnit &RHS) = delete; @@ -893,19 +889,14 @@ public: /// /// \returns Section pointer which can be NULL. MCSection *getSection() const { return Section; } - void setDebugSectionOffset(unsigned O) { Offset = O; } - unsigned getDebugSectionOffset() const { return Offset; } - void setLength(uint64_t L) { Length = L; } - uint64_t getLength() const { return Length; } - uint16_t getDwarfVersion() const { return Version; } - uint16_t getAddressSize() const { return AddrSize; } + void setDebugSectionOffset(uint64_t O) { Offset = O; } + uint64_t getDebugSectionOffset() const { return Offset; } DIE &getUnitDie() { return Die; } const DIE &getUnitDie() const { return Die; } }; struct BasicDIEUnit final : DIEUnit { - BasicDIEUnit(uint16_t Version, uint8_t AddrSize, dwarf::Tag UnitTag) - : DIEUnit(Version, AddrSize, UnitTag) {} + explicit BasicDIEUnit(dwarf::Tag UnitTag) : DIEUnit(UnitTag) {} }; //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h b/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h index f7fc74a27fca..bca6065b1643 100644 --- a/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h +++ b/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h @@ -12,6 +12,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LexicalScopes.h" #include <utility> namespace llvm { @@ -23,6 +24,24 @@ class MachineFunction; class MachineInstr; class TargetRegisterInfo; +/// Record instruction ordering so we can query their relative positions within +/// a function. Meta instructions are given the same ordinal as the preceding +/// non-meta instruction. Class state is invalid if MF is modified after +/// calling initialize. +class InstructionOrdering { +public: + void initialize(const MachineFunction &MF); + void clear() { InstNumberMap.clear(); } + + /// Check if instruction \p A comes before \p B, where \p A and \p B both + /// belong to the MachineFunction passed to initialize(). + bool isBefore(const MachineInstr *A, const MachineInstr *B) const; + +private: + /// Each instruction is assigned an order number. + DenseMap<const MachineInstr *, unsigned> InstNumberMap; +}; + /// For each user variable, keep a list of instruction ranges where this /// variable is accessible. The variables are listed in order of appearance. class DbgValueHistoryMap { @@ -52,6 +71,8 @@ public: /// register-described debug values that have their end index /// set to this entry's position in the entry vector. class Entry { + friend DbgValueHistoryMap; + public: enum EntryKind { DbgValue, Clobber }; @@ -89,6 +110,9 @@ public: return Entries[Index]; } + /// Drop location ranges which exist entirely outside each variable's scope. + void trimLocationRanges(const MachineFunction &MF, LexicalScopes &LScopes, + const InstructionOrdering &Ordering); bool empty() const { return VarEntries.empty(); } void clear() { VarEntries.clear(); } EntriesMap::const_iterator begin() const { return VarEntries.begin(); } diff --git a/llvm/include/llvm/CodeGen/DebugHandlerBase.h b/llvm/include/llvm/CodeGen/DebugHandlerBase.h index 4ff0fdea36ae..45823b2ba349 100644 --- a/llvm/include/llvm/CodeGen/DebugHandlerBase.h +++ b/llvm/include/llvm/CodeGen/DebugHandlerBase.h @@ -110,8 +110,13 @@ protected: virtual void endFunctionImpl(const MachineFunction *MF) = 0; virtual void skippedNonDebugFunction() {} +private: + InstructionOrdering InstOrdering; + // AsmPrinterHandler overrides. public: + void beginModule(Module *M) override; + void beginInstruction(const MachineInstr *MI) override; void endInstruction() override; @@ -129,8 +134,13 @@ public: /// If this type is derived from a base type then return base type size. static uint64_t getBaseTypeSize(const DIType *Ty); + + /// Return true if type encoding is unsigned. + static bool isUnsignedDIType(const DIType *Ty); + + const InstructionOrdering &getInstOrdering() const { return InstOrdering; } }; -} +} // namespace llvm #endif diff --git a/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h b/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h index e189352a7b2d..abeba62707c1 100644 --- a/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h +++ b/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h @@ -21,7 +21,7 @@ struct DwarfStringPoolEntry { static constexpr unsigned NotIndexed = -1; MCSymbol *Symbol; - unsigned Offset; + uint64_t Offset; unsigned Index; bool isIndexed() const { return Index != NotIndexed; } @@ -47,7 +47,7 @@ public: assert(getMapEntry()->second.Symbol && "No symbol available!"); return getMapEntry()->second.Symbol; } - unsigned getOffset() const { return getMapEntry()->second.Offset; } + uint64_t getOffset() const { return getMapEntry()->second.Offset; } bool isIndexed() const { return MapEntryAndIndexed.getInt(); } unsigned getIndex() const { assert(isIndexed()); diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h index 7662179db44d..81c1d6aad49a 100644 --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -224,10 +224,6 @@ protected: /// makes sense (for example, on function calls) MachineInstr *EmitStartPt; - /// Last local value flush point. On a subsequent flush, no local value will - /// sink past this point. - MachineBasicBlock::iterator LastFlushPoint; - public: virtual ~FastISel(); @@ -246,7 +242,7 @@ public: /// be appended. void startNewBlock(); - /// Flush the local value map and sink local values if possible. + /// Flush the local value map. void finishBasicBlock(); /// Return current debug location information. @@ -313,10 +309,7 @@ public: void removeDeadCode(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E); - struct SavePoint { - MachineBasicBlock::iterator InsertPt; - DebugLoc DL; - }; + using SavePoint = MachineBasicBlock::iterator; /// Prepare InsertPt to begin inserting instructions into the local /// value area and return the old insert position. @@ -510,18 +503,6 @@ protected: unsigned NumArgs); bool lowerCallTo(CallLoweringInfo &CLI); - bool isCommutativeIntrinsic(IntrinsicInst const *II) { - switch (II->getIntrinsicID()) { - case Intrinsic::sadd_with_overflow: - case Intrinsic::uadd_with_overflow: - case Intrinsic::smul_with_overflow: - case Intrinsic::umul_with_overflow: - return true; - default: - return false; - } - } - bool lowerCall(const CallInst *I); /// Select and emit code for a binary operator instruction, which has /// an opcode which directly corresponds to the given ISD opcode. @@ -536,7 +517,6 @@ protected: bool selectFreeze(const User *I); bool selectCast(const User *I, unsigned Opcode); bool selectExtractValue(const User *U); - bool selectInsertValue(const User *I); bool selectXRayCustomEvent(const CallInst *II); bool selectXRayTypedEvent(const CallInst *II); @@ -572,20 +552,6 @@ private: /// Removes dead local value instructions after SavedLastLocalvalue. void removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue); - struct InstOrderMap { - DenseMap<MachineInstr *, unsigned> Orders; - MachineInstr *FirstTerminator = nullptr; - unsigned FirstTerminatorOrder = std::numeric_limits<unsigned>::max(); - - void initialize(MachineBasicBlock *MBB, - MachineBasicBlock::iterator LastFlushPoint); - }; - - /// Sinks the local value materialization instruction LocalMI to its first use - /// in the basic block, or deletes it if it is not used. - void sinkLocalValueMaterialization(MachineInstr &LocalMI, Register DefReg, - InstOrderMap &OrderMap); - /// Insertion point before trying to select the current instruction. MachineBasicBlock::iterator SavedInsertPt; diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h index c99ca00eac29..b6bde0249f88 100644 --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -91,13 +91,33 @@ public: /// Track virtual registers created for exception pointers. DenseMap<const Value *, Register> CatchPadExceptionPointers; - /// Keep track of frame indices allocated for statepoints as they could be - /// used across basic block boundaries (e.g. for an invoke). For each - /// gc.statepoint instruction, maps uniqued llvm IR values to the slots they - /// were spilled in. If a value is mapped to None it means we visited the - /// value but didn't spill it (because it was a constant, for instance). - using StatepointSpillMapTy = DenseMap<const Value *, Optional<int>>; - DenseMap<const Instruction *, StatepointSpillMapTy> StatepointSpillMaps; + /// Helper object to track which of three possible relocation mechanisms are + /// used for a particular value being relocated over a statepoint. + struct StatepointRelocationRecord { + enum RelocType { + // Value did not need to be relocated and can be used directly. + NoRelocate, + // Value was spilled to stack and needs filled at the gc.relocate. + Spill, + // Value was lowered to tied def and gc.relocate should be replaced with + // copy from vreg. + VReg, + } type = NoRelocate; + // Payload contains either frame index of the stack slot in which the value + // was spilled, or virtual register which contains the re-definition. + union payload_t { + payload_t() : FI(-1) {} + int FI; + Register Reg; + } payload; + }; + + /// Keep track of each value which was relocated and the strategy used to + /// relocate that value. This information is required when visiting + /// gc.relocates which may appear in following blocks. + using StatepointSpillMapTy = + DenseMap<const Value *, StatepointRelocationRecord>; + DenseMap<const Instruction *, StatepointSpillMapTy> StatepointRelocationMaps; /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in /// the entry block. This allows the allocas to be efficiently referenced diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h index 8bd9e9443552..f76dec57c840 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h @@ -16,14 +16,12 @@ #include "llvm/CodeGen/CSEConfigBase.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelWorkList.h" -#include "llvm/CodeGen/GlobalISel/Utils.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/CodeGen.h" namespace llvm { +class MachineBasicBlock; /// A class that wraps MachineInstrs and derives from FoldingSetNode in order to /// be uniqued in a CSEMap. The tradeoff here is extra memory allocations for @@ -184,6 +182,8 @@ public: const GISelInstProfileBuilder &addNodeIDRegNum(Register Reg) const; + const GISelInstProfileBuilder &addNodeIDReg(Register Reg) const; + const GISelInstProfileBuilder &addNodeIDImmediate(int64_t Imm) const; const GISelInstProfileBuilder & addNodeIDMBB(const MachineBasicBlock *MBB) const; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 4d60dffb91db..26ae7129f04a 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -17,8 +17,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetCallingConv.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Type.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include <cstdint> @@ -26,16 +29,14 @@ namespace llvm { -class CCState; class CallBase; class DataLayout; class Function; +class FunctionLoweringInfo; class MachineIRBuilder; -class MachineOperand; struct MachinePointerInfo; class MachineRegisterInfo; class TargetLowering; -class Type; class Value; class CallLowering { @@ -43,21 +44,30 @@ class CallLowering { virtual void anchor(); public: - struct ArgInfo { + struct BaseArgInfo { + Type *Ty; + SmallVector<ISD::ArgFlagsTy, 4> Flags; + bool IsFixed; + + BaseArgInfo(Type *Ty, + ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(), + bool IsFixed = true) + : Ty(Ty), Flags(Flags.begin(), Flags.end()), IsFixed(IsFixed) {} + + BaseArgInfo() : Ty(nullptr), IsFixed(false) {} + }; + + struct ArgInfo : public BaseArgInfo { SmallVector<Register, 4> Regs; // If the argument had to be split into multiple parts according to the // target calling convention, then this contains the original vregs // if the argument was an incoming arg. SmallVector<Register, 2> OrigRegs; - Type *Ty; - SmallVector<ISD::ArgFlagsTy, 4> Flags; - bool IsFixed; ArgInfo(ArrayRef<Register> Regs, Type *Ty, ArrayRef<ISD::ArgFlagsTy> Flags = ArrayRef<ISD::ArgFlagsTy>(), bool IsFixed = true) - : Regs(Regs.begin(), Regs.end()), Ty(Ty), - Flags(Flags.begin(), Flags.end()), IsFixed(IsFixed) { + : BaseArgInfo(Ty, Flags, IsFixed), Regs(Regs.begin(), Regs.end()) { if (!Regs.empty() && Flags.empty()) this->Flags.push_back(ISD::ArgFlagsTy()); // FIXME: We should have just one way of saying "no register". @@ -66,7 +76,7 @@ public: "only void types should have no register"); } - ArgInfo() : Ty(nullptr), IsFixed(false) {} + ArgInfo() : BaseArgInfo() {} }; struct CallLoweringInfo { @@ -102,6 +112,15 @@ public: /// True if the call is to a vararg function. bool IsVarArg = false; + + /// True if the function's return value can be lowered to registers. + bool CanLowerReturn = true; + + /// VReg to hold the hidden sret parameter. + Register DemoteRegister; + + /// The stack index for sret demotion. + int DemoteStackIndex; }; /// Argument handling is mostly uniform between the four places that @@ -111,15 +130,18 @@ public: /// argument should go, exactly what happens can vary slightly. This /// class abstracts the differences. struct ValueHandler { - ValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, - CCAssignFn *AssignFn) - : MIRBuilder(MIRBuilder), MRI(MRI), AssignFn(AssignFn) {} + ValueHandler(bool IsIncoming, MachineIRBuilder &MIRBuilder, + MachineRegisterInfo &MRI, CCAssignFn *AssignFn) + : MIRBuilder(MIRBuilder), MRI(MRI), AssignFn(AssignFn), + IsIncomingArgumentHandler(IsIncoming) {} virtual ~ValueHandler() = default; /// Returns true if the handler is dealing with incoming arguments, /// i.e. those that move values from some physical location to vregs. - virtual bool isIncomingArgumentHandler() const = 0; + bool isIncomingArgumentHandler() const { + return IsIncomingArgumentHandler; + } /// Materialize a VReg containing the address of the specified /// stack-based object. This is either based on a FrameIndex or @@ -147,6 +169,7 @@ public: virtual void assignValueToAddress(const ArgInfo &Arg, Register Addr, uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) { + assert(Arg.Regs.size() == 1); assignValueToAddress(Arg.Regs[0], Addr, Size, MPO, VA); } @@ -177,9 +200,22 @@ public: CCAssignFn *AssignFn; private: + bool IsIncomingArgumentHandler; virtual void anchor(); }; + struct IncomingValueHandler : public ValueHandler { + IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + CCAssignFn *AssignFn) + : ValueHandler(true, MIRBuilder, MRI, AssignFn) {} + }; + + struct OutgoingValueHandler : public ValueHandler { + OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + CCAssignFn *AssignFn) + : ValueHandler(false, MIRBuilder, MRI, AssignFn) {} + }; + protected: /// Getter for generic TargetLowering class. const TargetLowering *getTLI() const { @@ -192,6 +228,17 @@ protected: return static_cast<const XXXTargetLowering *>(TLI); } + /// \returns Flags corresponding to the attributes on the \p ArgIdx-th + /// parameter of \p Call. + ISD::ArgFlagsTy getAttributesForArgIdx(const CallBase &Call, + unsigned ArgIdx) const; + + /// Adds flags to \p Flags based off of the attributes in \p Attrs. + /// \p OpIdx is the index in \p Attrs to add flags from. + void addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags, + const AttributeList &Attrs, + unsigned OpIdx) const; + template <typename FuncInfoTy> void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const; @@ -215,7 +262,7 @@ protected: MachineIRBuilder &MIRBuilder) const; /// Invoke Handler::assignArg on each of the given \p Args and then use - /// \p Callback to move them to the assigned locations. + /// \p Handler to move them to the assigned locations. /// /// \return True if everything has succeeded, false otherwise. bool handleAssignments(MachineIRBuilder &MIRBuilder, @@ -235,6 +282,14 @@ protected: CCAssignFn &AssignFnFixed, CCAssignFn &AssignFnVarArg) const; + /// Check whether parameters to a call that are passed in callee saved + /// registers are the same as from the calling function. This needs to be + /// checked for tail call eligibility. + bool parametersInCSRMatch(const MachineRegisterInfo &MRI, + const uint32_t *CallerPreservedMask, + const SmallVectorImpl<CCValAssign> &ArgLocs, + const SmallVectorImpl<ArgInfo> &OutVals) const; + /// \returns True if the calling convention for a callee and its caller pass /// results in the same way. Typically used for tail call eligibility checks. /// @@ -265,20 +320,73 @@ public: return false; } + /// Load the returned value from the stack into virtual registers in \p VRegs. + /// It uses the frame index \p FI and the start offset from \p DemoteReg. + /// The loaded data size will be determined from \p RetTy. + void insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy, + ArrayRef<Register> VRegs, Register DemoteReg, + int FI) const; + + /// Store the return value given by \p VRegs into stack starting at the offset + /// specified in \p DemoteReg. + void insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy, + ArrayRef<Register> VRegs, Register DemoteReg) const; + + /// Insert the hidden sret ArgInfo to the beginning of \p SplitArgs. + /// This function should be called from the target specific + /// lowerFormalArguments when \p F requires the sret demotion. + void insertSRetIncomingArgument(const Function &F, + SmallVectorImpl<ArgInfo> &SplitArgs, + Register &DemoteReg, MachineRegisterInfo &MRI, + const DataLayout &DL) const; + + /// For the call-base described by \p CB, insert the hidden sret ArgInfo to + /// the OrigArgs field of \p Info. + void insertSRetOutgoingArgument(MachineIRBuilder &MIRBuilder, + const CallBase &CB, + CallLoweringInfo &Info) const; + + /// \return True if the return type described by \p Outs can be returned + /// without performing sret demotion. + bool checkReturn(CCState &CCInfo, SmallVectorImpl<BaseArgInfo> &Outs, + CCAssignFn *Fn) const; + + /// Get the type and the ArgFlags for the split components of \p RetTy as + /// returned by \c ComputeValueVTs. + void getReturnInfo(CallingConv::ID CallConv, Type *RetTy, AttributeList Attrs, + SmallVectorImpl<BaseArgInfo> &Outs, + const DataLayout &DL) const; + + /// Toplevel function to check the return type based on the target calling + /// convention. \return True if the return value of \p MF can be returned + /// without performing sret demotion. + bool checkReturnTypeForCallConv(MachineFunction &MF) const; + + /// This hook must be implemented to check whether the return values + /// described by \p Outs can fit into the return registers. If false + /// is returned, an sret-demotion is performed. + virtual bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv, + SmallVectorImpl<BaseArgInfo> &Outs, + bool IsVarArg) const { + return true; + } + /// This hook must be implemented to lower outgoing return values, described /// by \p Val, into the specified virtual registers \p VRegs. /// This hook is used by GlobalISel. /// + /// \p FLI is required for sret demotion. + /// /// \p SwiftErrorVReg is non-zero if the function has a swifterror parameter /// that needs to be implicitly returned. /// /// \return True if the lowering succeeds, false otherwise. virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, - ArrayRef<Register> VRegs, + ArrayRef<Register> VRegs, FunctionLoweringInfo &FLI, Register SwiftErrorVReg) const { if (!supportSwiftError()) { assert(SwiftErrorVReg == 0 && "attempt to use unsupported swifterror"); - return lowerReturn(MIRBuilder, Val, VRegs); + return lowerReturn(MIRBuilder, Val, VRegs, FLI); } return false; } @@ -286,7 +394,8 @@ public: /// This hook behaves as the extended lowerReturn function, but for targets /// that do not support swifterror value promotion. virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, - ArrayRef<Register> VRegs) const { + ArrayRef<Register> VRegs, + FunctionLoweringInfo &FLI) const { return false; } @@ -299,12 +408,13 @@ public: /// the second in \c VRegs[1], and so on. For each argument, there will be one /// register for each non-aggregate type, as returned by \c computeValueLLTs. /// \p MIRBuilder is set to the proper insertion for the argument - /// lowering. + /// lowering. \p FLI is required for sret demotion. /// /// \return True if the lowering succeeded, false otherwise. virtual bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, - ArrayRef<ArrayRef<Register>> VRegs) const { + ArrayRef<ArrayRef<Register>> VRegs, + FunctionLoweringInfo &FLI) const { return false; } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index c317b7ed4c54..8570f5ca5dd5 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -17,6 +17,8 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H #define LLVM_CODEGEN_GLOBALISEL_COMBINER_HELPER_H +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/Register.h" #include "llvm/Support/Alignment.h" @@ -25,12 +27,15 @@ namespace llvm { class GISelChangeObserver; class MachineIRBuilder; +class MachineInstrBuilder; class MachineRegisterInfo; class MachineInstr; class MachineOperand; class GISelKnownBits; class MachineDominatorTree; class LegalizerInfo; +struct LegalityQuery; +class TargetLowering; struct PreferredTuple { LLT Ty; // The result type of the extend. @@ -50,6 +55,37 @@ struct PtrAddChain { Register Base; }; +struct RegisterImmPair { + Register Reg; + int64_t Imm; +}; + +struct ShiftOfShiftedLogic { + MachineInstr *Logic; + MachineInstr *Shift2; + Register LogicNonShiftReg; + uint64_t ValSum; +}; + +using OperandBuildSteps = + SmallVector<std::function<void(MachineInstrBuilder &)>, 4>; +struct InstructionBuildSteps { + unsigned Opcode = 0; /// The opcode for the produced instruction. + OperandBuildSteps OperandFns; /// Operands to be added to the instruction. + InstructionBuildSteps() = default; + InstructionBuildSteps(unsigned Opcode, const OperandBuildSteps &OperandFns) + : Opcode(Opcode), OperandFns(OperandFns) {} +}; + +struct InstructionStepsMatchInfo { + /// Describes instructions to be built during a combine. + SmallVector<InstructionBuildSteps, 2> InstrsToBuild; + InstructionStepsMatchInfo() = default; + InstructionStepsMatchInfo( + std::initializer_list<InstructionBuildSteps> InstrsToBuild) + : InstrsToBuild(InstrsToBuild) {} +}; + class CombinerHelper { protected: MachineIRBuilder &Builder; @@ -69,6 +105,12 @@ public: return KB; } + const TargetLowering &getTargetLowering() const; + + /// \return true if the combine is running prior to legalization, or if \p + /// Query is legal on the target. + bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const; + /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const; @@ -107,12 +149,17 @@ public: bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo); void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo); - bool matchSextAlreadyExtended(MachineInstr &MI); - bool applySextAlreadyExtended(MachineInstr &MI); + bool matchSextTruncSextLoad(MachineInstr &MI); + bool applySextTruncSextLoad(MachineInstr &MI); - bool matchElideBrByInvertingCond(MachineInstr &MI); - void applyElideBrByInvertingCond(MachineInstr &MI); - bool tryElideBrByInvertingCond(MachineInstr &MI); + /// Match sext_inreg(load p), imm -> sextload p + bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo); + bool applySextInRegOfLoad(MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo); + + /// If a brcond's true block is not the fallthrough, make it so by inverting + /// the condition and swapping operands. + bool matchOptBrCondByInvertingCond(MachineInstr &MI); + void applyOptBrCondByInvertingCond(MachineInstr &MI); /// If \p MI is G_CONCAT_VECTORS, try to combine it. /// Returns true if MI changed. @@ -189,10 +236,28 @@ public: bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo); bool applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo); + /// Fold (shift (shift base, x), y) -> (shift base (x+y)) + bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo); + bool applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo); + + /// If we have a shift-by-constant of a bitwise logic op that itself has a + /// shift-by-constant operand with identical opcode, we may be able to convert + /// that into 2 independent shifts followed by the logic op. + bool matchShiftOfShiftedLogic(MachineInstr &MI, + ShiftOfShiftedLogic &MatchInfo); + bool applyShiftOfShiftedLogic(MachineInstr &MI, + ShiftOfShiftedLogic &MatchInfo); + /// Transform a multiply by a power-of-2 value to a left shift. bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal); bool applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal); + // Transform a G_SHL with an extended source into a narrower shift if + // possible. + bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData); + bool applyCombineShlOfExtend(MachineInstr &MI, + const RegisterImmPair &MatchData); + /// Reduce a shift by a constant to an unmerge and a shift on a half sized /// type. This will not produce a shift smaller than \p TargetShiftSize. bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, @@ -200,6 +265,86 @@ public: bool applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal); bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount); + /// Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z. + bool + matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, + SmallVectorImpl<Register> &Operands); + bool + applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, + SmallVectorImpl<Register> &Operands); + + /// Transform G_UNMERGE Constant -> Constant1, Constant2, ... + bool matchCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl<APInt> &Csts); + bool applyCombineUnmergeConstant(MachineInstr &MI, + SmallVectorImpl<APInt> &Csts); + + /// Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z. + bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI); + bool applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI); + + /// Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0 + bool matchCombineUnmergeZExtToZExt(MachineInstr &MI); + bool applyCombineUnmergeZExtToZExt(MachineInstr &MI); + + /// Transform fp_instr(cst) to constant result of the fp operation. + bool matchCombineConstantFoldFpUnary(MachineInstr &MI, + Optional<APFloat> &Cst); + bool applyCombineConstantFoldFpUnary(MachineInstr &MI, + Optional<APFloat> &Cst); + + /// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space. + bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg); + bool applyCombineI2PToP2I(MachineInstr &MI, Register &Reg); + + /// Transform PtrToInt(IntToPtr(x)) to x. + bool matchCombineP2IToI2P(MachineInstr &MI, Register &Reg); + bool applyCombineP2IToI2P(MachineInstr &MI, Register &Reg); + + /// Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) + /// Transform G_ADD y, (G_PTRTOINT x) -> G_PTRTOINT (G_PTR_ADD x, y) + bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, + std::pair<Register, bool> &PtrRegAndCommute); + bool applyCombineAddP2IToPtrAdd(MachineInstr &MI, + std::pair<Register, bool> &PtrRegAndCommute); + + // Transform G_PTR_ADD (G_PTRTOINT C1), C2 -> C1 + C2 + bool matchCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst); + bool applyCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst); + + /// Transform anyext(trunc(x)) to x. + bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg); + bool applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg); + + /// Transform [asz]ext([asz]ext(x)) to [asz]ext x. + bool matchCombineExtOfExt(MachineInstr &MI, + std::tuple<Register, unsigned> &MatchInfo); + bool applyCombineExtOfExt(MachineInstr &MI, + std::tuple<Register, unsigned> &MatchInfo); + + /// Transform fneg(fneg(x)) to x. + bool matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg); + + /// Match fabs(fabs(x)) to fabs(x). + bool matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src); + bool applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src); + + /// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x). + bool matchCombineTruncOfExt(MachineInstr &MI, + std::pair<Register, unsigned> &MatchInfo); + bool applyCombineTruncOfExt(MachineInstr &MI, + std::pair<Register, unsigned> &MatchInfo); + + /// Transform trunc (shl x, K) to shl (trunc x), + /// K => K < VT.getScalarSizeInBits(). + bool matchCombineTruncOfShl(MachineInstr &MI, + std::pair<Register, Register> &MatchInfo); + bool applyCombineTruncOfShl(MachineInstr &MI, + std::pair<Register, Register> &MatchInfo); + + /// Transform G_MUL(x, -1) to G_SUB(0, x) + bool applyCombineMulByNegativeOne(MachineInstr &MI); + /// Return true if any explicit use operand on \p MI is defined by a /// G_IMPLICIT_DEF. bool matchAnyExplicitUseIsUndef(MachineInstr &MI); @@ -214,6 +359,13 @@ public: /// Return true if a G_STORE instruction \p MI is storing an undef value. bool matchUndefStore(MachineInstr &MI); + /// Return true if a G_SELECT instruction \p MI has an undef comparison. + bool matchUndefSelectCmp(MachineInstr &MI); + + /// Return true if a G_SELECT instruction \p MI has a constant comparison. If + /// true, \p OpIdx will store the operand index of the known selected value. + bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx); + /// Replace an instruction with a G_FCONSTANT with value \p C. bool replaceInstWithFConstant(MachineInstr &MI, double C); @@ -226,6 +378,9 @@ public: /// Delete \p MI and replace all of its uses with its \p OpIdx-th operand. bool replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx); + /// Delete \p MI and replace all of its uses with \p Replacement. + bool replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement); + /// Return true if \p MOP1 and \p MOP2 are register operands are defined by /// equivalent instructions. bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2); @@ -243,6 +398,12 @@ public: /// Check if operand \p OpIdx is zero. bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx); + /// Check if operand \p OpIdx is undef. + bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx); + + /// Check if operand \p OpIdx is known to be a power of 2. + bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx); + /// Erase \p MI bool eraseInst(MachineInstr &MI); @@ -252,6 +413,79 @@ public: bool applySimplifyAddToSub(MachineInstr &MI, std::tuple<Register, Register> &MatchInfo); + /// Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y)) + bool + matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, + InstructionStepsMatchInfo &MatchInfo); + + /// Replace \p MI with a series of instructions described in \p MatchInfo. + bool applyBuildInstructionSteps(MachineInstr &MI, + InstructionStepsMatchInfo &MatchInfo); + + /// Match ashr (shl x, C), C -> sext_inreg (C) + bool matchAshrShlToSextInreg(MachineInstr &MI, + std::tuple<Register, int64_t> &MatchInfo); + bool applyAshShlToSextInreg(MachineInstr &MI, + std::tuple<Register, int64_t> &MatchInfo); + /// \return true if \p MI is a G_AND instruction whose operands are x and y + /// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.) + /// + /// \param [in] MI - The G_AND instruction. + /// \param [out] Replacement - A register the G_AND should be replaced with on + /// success. + bool matchRedundantAnd(MachineInstr &MI, Register &Replacement); + + /// \return true if \p MI is a G_OR instruction whose operands are x and y + /// where x | y == x or x | y == y. (E.g., one of operands is all-zeros + /// value.) + /// + /// \param [in] MI - The G_OR instruction. + /// \param [out] Replacement - A register the G_OR should be replaced with on + /// success. + bool matchRedundantOr(MachineInstr &MI, Register &Replacement); + + /// \return true if \p MI is a G_SEXT_INREG that can be erased. + bool matchRedundantSExtInReg(MachineInstr &MI); + + /// Combine inverting a result of a compare into the opposite cond code. + bool matchNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate); + bool applyNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate); + + /// Fold (xor (and x, y), y) -> (and (not x), y) + ///{ + bool matchXorOfAndWithSameReg(MachineInstr &MI, + std::pair<Register, Register> &MatchInfo); + bool applyXorOfAndWithSameReg(MachineInstr &MI, + std::pair<Register, Register> &MatchInfo); + ///} + + /// Combine G_PTR_ADD with nullptr to G_INTTOPTR + bool matchPtrAddZero(MachineInstr &MI); + bool applyPtrAddZero(MachineInstr &MI); + + /// Combine G_UREM x, (known power of 2) to an add and bitmasking. + bool applySimplifyURemByPow2(MachineInstr &MI); + + bool matchCombineInsertVecElts(MachineInstr &MI, + SmallVectorImpl<Register> &MatchInfo); + + bool applyCombineInsertVecElts(MachineInstr &MI, + SmallVectorImpl<Register> &MatchInfo); + + /// Match expression trees of the form + /// + /// \code + /// sN *a = ... + /// sM val = a[0] | (a[1] << N) | (a[2] << 2N) | (a[3] << 3N) ... + /// \endcode + /// + /// And check if the tree can be replaced with a M-bit load + possibly a + /// bswap. + bool matchLoadOrCombine(MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo); + bool applyLoadOrCombine(MachineInstr &MI, + std::function<void(MachineIRBuilder &)> &MatchInfo); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); @@ -280,6 +514,30 @@ private: /// \returns true if a candidate is found. bool findPreIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, Register &Offset); + + /// Helper function for matchLoadOrCombine. Searches for Registers + /// which may have been produced by a load instruction + some arithmetic. + /// + /// \param [in] Root - The search root. + /// + /// \returns The Registers found during the search. + Optional<SmallVector<Register, 8>> + findCandidatesForLoadOrCombine(const MachineInstr *Root) const; + + /// Helper function for matchLoadOrCombine. + /// + /// Checks if every register in \p RegsToVisit is defined by a load + /// instruction + some arithmetic. + /// + /// \param [out] MemOffset2Idx - Maps the byte positions each load ends up + /// at to the index of the load. + /// \param [in] MemSizeInBits - The number of bits each load should produce. + /// + /// \returns The lowest-index load found and the lowest index on success. + Optional<std::pair<MachineInstr *, int64_t>> findLoadOffsetsForLoadOrCombine( + SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx, + const SmallVector<Register, 8> &RegsToVisit, + const unsigned MemSizeInBits); }; } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h index d8fe4b3103db..dd7f04a33f4b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelChangeObserver.h @@ -51,7 +51,7 @@ public: /// For convenience, finishedChangingAllUsesOfReg() will report the completion /// of the changes. The use list may change between this call and /// finishedChangingAllUsesOfReg(). - void changingAllUsesOfReg(const MachineRegisterInfo &MRI, unsigned Reg); + void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg); /// All instructions reported as changing by changingAllUsesOfReg() have /// finished being changed. void finishedChangingAllUsesOfReg(); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h index 55cf54d6e946..eafed3760738 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h @@ -13,13 +13,11 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H #define LLVM_CODEGEN_GLOBALISEL_KNOWNBITSINFO_H -#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Register.h" -#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/Support/KnownBits.h" namespace llvm { @@ -36,10 +34,16 @@ class GISelKnownBits : public GISelChangeObserver { /// Cache maintained during a computeKnownBits request. SmallDenseMap<Register, KnownBits, 16> ComputeKnownBitsCache; + void computeKnownBitsMin(Register Src0, Register Src1, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth = 0); + + unsigned computeNumSignBitsMin(Register Src0, Register Src1, + const APInt &DemandedElts, unsigned Depth = 0); + public: GISelKnownBits(MachineFunction &MF, unsigned MaxDepth = 6); virtual ~GISelKnownBits() = default; - void setMF(MachineFunction &MF); const MachineFunction &getMachineFunction() const { return MF; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h index b0bb519283b1..9e7ade3ee329 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelWorkList.h @@ -11,9 +11,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Support/Debug.h" namespace llvm { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 751ab67c4e97..8eab8a5846a7 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -20,12 +20,14 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" -#include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/CodeGen.h" #include <memory> #include <utility> @@ -36,8 +38,8 @@ class BasicBlock; class CallInst; class CallLowering; class Constant; +class ConstrainedFPIntrinsic; class DataLayout; -class FunctionLoweringInfo; class Instruction; class MachineBasicBlock; class MachineFunction; @@ -217,12 +219,14 @@ private: /// Translate an LLVM string intrinsic (memcpy, memset, ...). bool translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, - Intrinsic::ID ID); + unsigned Opcode); void getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder); bool translateOverflowIntrinsic(const CallInst &CI, unsigned Op, MachineIRBuilder &MIRBuilder); + bool translateFixedPointIntrinsic(unsigned Op, const CallInst &CI, + MachineIRBuilder &MIRBuilder); /// Helper function for translateSimpleIntrinsic. /// \return The generic opcode for \p IntrinsicID if \p IntrinsicID is a @@ -256,6 +260,19 @@ private: /// \pre \p U is a call instruction. bool translateCall(const User &U, MachineIRBuilder &MIRBuilder); + /// When an invoke or a cleanupret unwinds to the next EH pad, there are + /// many places it could ultimately go. In the IR, we have a single unwind + /// destination, but in the machine CFG, we enumerate all the possible blocks. + /// This function skips over imaginary basic blocks that hold catchswitch + /// instructions, and finds all the "real" machine + /// basic block destinations. As those destinations may not be successors of + /// EHPadBB, here we also calculate the edge probability to those + /// destinations. The passed-in Prob is the edge probability to EHPadBB. + bool findUnwindDestinations( + const BasicBlock *EHPadBB, BranchProbability Prob, + SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> + &UnwindDests); + bool translateInvoke(const User &U, MachineIRBuilder &MIRBuilder); bool translateCallBr(const User &U, MachineIRBuilder &MIRBuilder); @@ -287,11 +304,37 @@ private: /// MachineBasicBlocks for the function have been created. void finishPendingPhis(); + /// Translate \p Inst into a unary operation \p Opcode. + /// \pre \p U is a unary operation. + bool translateUnaryOp(unsigned Opcode, const User &U, + MachineIRBuilder &MIRBuilder); + /// Translate \p Inst into a binary operation \p Opcode. /// \pre \p U is a binary operation. bool translateBinaryOp(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder); + /// If the set of cases should be emitted as a series of branches, return + /// true. If we should emit this as a bunch of and/or'd together conditions, + /// return false. + bool shouldEmitAsBranches(const std::vector<SwitchCG::CaseBlock> &Cases); + /// Helper method for findMergedConditions. + /// This function emits a branch and is used at the leaves of an OR or an + /// AND operator tree. + void emitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + BranchProbability TProb, + BranchProbability FProb, bool InvertCond); + /// Used during condbr translation to find trees of conditions that can be + /// optimized. + void findMergedConditions(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, BranchProbability TProb, + BranchProbability FProb, bool InvertCond); + /// Translate branch (br) instruction. /// \pre \p U is a branch instruction. bool translateBr(const User &U, MachineIRBuilder &MIRBuilder); @@ -305,19 +348,23 @@ private: void emitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB, MachineIRBuilder &MIB); - bool lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W, - MachineBasicBlock *SwitchMBB, - MachineBasicBlock *CurMBB, - MachineBasicBlock *DefaultMBB, - MachineIRBuilder &MIB, - MachineFunction::iterator BBI, - BranchProbability UnhandledProbs, - SwitchCG::CaseClusterIt I, - MachineBasicBlock *Fallthrough, - bool FallthroughUnreachable); - - bool lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, - Value *Cond, + /// Generate for for the BitTest header block, which precedes each sequence of + /// BitTestCases. + void emitBitTestHeader(SwitchCG::BitTestBlock &BTB, + MachineBasicBlock *SwitchMBB); + /// Generate code to produces one "bit test" for a given BitTestCase \p B. + void emitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB, + BranchProbability BranchProbToNext, Register Reg, + SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB); + + bool lowerJumpTableWorkItem( + SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB, + MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB, + MachineIRBuilder &MIB, MachineFunction::iterator BBI, + BranchProbability UnhandledProbs, SwitchCG::CaseClusterIt I, + MachineBasicBlock *Fallthrough, bool FallthroughUnreachable); + + bool lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, Value *Cond, MachineBasicBlock *Fallthrough, bool FallthroughUnreachable, BranchProbability UnhandledProbs, @@ -325,6 +372,14 @@ private: MachineIRBuilder &MIB, MachineBasicBlock *SwitchMBB); + bool lowerBitTestWorkItem( + SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB, + MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB, + MachineIRBuilder &MIB, MachineFunction::iterator BBI, + BranchProbability DefaultProb, BranchProbability UnhandledProbs, + SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough, + bool FallthroughUnreachable); + bool lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond, MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB, @@ -351,8 +406,6 @@ private: /// \pre \p U is a return instruction. bool translateRet(const User &U, MachineIRBuilder &MIRBuilder); - bool translateFSub(const User &U, MachineIRBuilder &MIRBuilder); - bool translateFNeg(const User &U, MachineIRBuilder &MIRBuilder); bool translateAdd(const User &U, MachineIRBuilder &MIRBuilder) { @@ -437,6 +490,9 @@ private: bool translateFAdd(const User &U, MachineIRBuilder &MIRBuilder) { return translateBinaryOp(TargetOpcode::G_FADD, U, MIRBuilder); } + bool translateFSub(const User &U, MachineIRBuilder &MIRBuilder) { + return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder); + } bool translateFMul(const User &U, MachineIRBuilder &MIRBuilder) { return translateBinaryOp(TargetOpcode::G_FMUL, U, MIRBuilder); } @@ -515,6 +571,8 @@ private: /// Current target configuration. Controls how the pass handles errors. const TargetPassConfig *TPC; + CodeGenOpt::Level OptLevel; + /// Current optimization remark emitter. Used to report failures. std::unique_ptr<OptimizationRemarkEmitter> ORE; @@ -614,12 +672,12 @@ private: BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const; - void addSuccessorWithProb(MachineBasicBlock *Src, MachineBasicBlock *Dst, - BranchProbability Prob); + void addSuccessorWithProb( + MachineBasicBlock *Src, MachineBasicBlock *Dst, + BranchProbability Prob = BranchProbability::getUnknown()); public: - // Ctor, nothing fancy. - IRTranslator(); + IRTranslator(CodeGenOpt::Level OptLevel = CodeGenOpt::None); StringRef getPassName() const override { return "IRTranslator"; } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 1af96cb4a9ee..5b8243a93e7f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -112,6 +112,14 @@ enum { /// - InsnID - Instruction ID /// - Expected opcode GIM_CheckOpcode, + + /// Check the opcode on the specified instruction, checking 2 acceptable + /// alternatives. + /// - InsnID - Instruction ID + /// - Expected opcode + /// - Alternative expected opcode + GIM_CheckOpcodeIsEither, + /// Check the instruction has the right number of operands /// - InsnID - Instruction ID /// - Expected number of operands @@ -164,6 +172,15 @@ enum { GIM_CheckMemorySizeEqualToLLT, GIM_CheckMemorySizeLessThanLLT, GIM_CheckMemorySizeGreaterThanLLT, + + /// Check if this is a vector that can be treated as a vector splat + /// constant. This is valid for both G_BUILD_VECTOR as well as + /// G_BUILD_VECTOR_TRUNC. For AllOnes refers to individual bits, so a -1 + /// element. + /// - InsnID - Instruction ID + GIM_CheckIsBuildVectorAllOnes, + GIM_CheckIsBuildVectorAllZeros, + /// Check a generic C++ instruction predicate /// - InsnID - Instruction ID /// - PredicateID - The ID of the predicate function to call @@ -237,6 +254,15 @@ enum { /// - OtherOpIdx - Other operand index GIM_CheckIsSameOperand, + /// Predicates with 'let PredicateCodeUsesOperands = 1' need to examine some + /// named operands that will be recorded in RecordedOperands. Names of these + /// operands are referenced in predicate argument list. Emitter determines + /// StoreIdx(corresponds to the order in which names appear in argument list). + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - StoreIdx - Store location in RecordedOperands. + GIM_RecordNamedOperand, + /// Fail the current try-block, or completely fail to match if there is no /// current try-block. GIM_Reject, @@ -429,6 +455,11 @@ protected: std::vector<ComplexRendererFns::value_type> Renderers; RecordedMIVector MIs; DenseMap<unsigned, unsigned> TempRegisters; + /// Named operands that predicate with 'let PredicateCodeUsesOperands = 1' + /// referenced in its argument list. Operands are inserted at index set by + /// emitter, it corresponds to the order in which names appear in argument + /// list. Currently such predicates don't have more then 3 arguments. + std::array<const MachineOperand *, 3> RecordedOperands; MatcherState(unsigned MaxRenderers); }; @@ -489,21 +520,13 @@ protected: llvm_unreachable( "Subclasses must override this with a tablegen-erated function"); } - virtual bool testMIPredicate_MI(unsigned, const MachineInstr &) const { + virtual bool testMIPredicate_MI( + unsigned, const MachineInstr &, + const std::array<const MachineOperand *, 3> &Operands) const { llvm_unreachable( "Subclasses must override this with a tablegen-erated function"); } - /// Constrain a register operand of an instruction \p I to a specified - /// register class. This could involve inserting COPYs before (for uses) or - /// after (for defs) and may replace the operand of \p I. - /// \returns whether operand regclass constraining succeeded. - bool constrainOperandRegToRegClass(MachineInstr &I, unsigned OpIdx, - const TargetRegisterClass &RC, - const TargetInstrInfo &TII, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const; - bool isOperandImmEqual(const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index 73ac578d61be..82e26b0bc355 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -154,24 +154,31 @@ bool InstructionSelector::executeMatchTable( break; } - case GIM_CheckOpcode: { + case GIM_CheckOpcode: + case GIM_CheckOpcodeIsEither: { int64_t InsnID = MatchTable[CurrentIdx++]; - int64_t Expected = MatchTable[CurrentIdx++]; + int64_t Expected0 = MatchTable[CurrentIdx++]; + int64_t Expected1 = -1; + if (MatcherOpcode == GIM_CheckOpcodeIsEither) + Expected1 = MatchTable[CurrentIdx++]; assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); unsigned Opcode = State.MIs[InsnID]->getOpcode(); DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), - dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID - << "], ExpectedOpcode=" << Expected - << ") // Got=" << Opcode << "\n"); - if (Opcode != Expected) { + dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID + << "], ExpectedOpcode=" << Expected0; + if (MatcherOpcode == GIM_CheckOpcodeIsEither) + dbgs() << " || " << Expected1; + dbgs() << ") // Got=" << Opcode << "\n"; + ); + + if (Opcode != Expected0 && Opcode != Expected1) { if (handleReject() == RejectAndGiveUp) return false; } break; } - case GIM_SwitchOpcode: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t LowerBound = MatchTable[CurrentIdx++]; @@ -193,7 +200,7 @@ bool InstructionSelector::executeMatchTable( CurrentIdx = MatchTable[CurrentIdx + (Opcode - LowerBound)]; if (!CurrentIdx) { CurrentIdx = Default; - break; + break; } OnFailResumeAt.push_back(Default); break; @@ -321,6 +328,35 @@ bool InstructionSelector::executeMatchTable( return false; break; } + case GIM_CheckIsBuildVectorAllOnes: + case GIM_CheckIsBuildVectorAllZeros: { + int64_t InsnID = MatchTable[CurrentIdx++]; + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx + << ": GIM_CheckBuildVectorAll{Zeros|Ones}(MIs[" + << InsnID << "])\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + const MachineInstr *MI = State.MIs[InsnID]; + assert((MI->getOpcode() == TargetOpcode::G_BUILD_VECTOR || + MI->getOpcode() == TargetOpcode::G_BUILD_VECTOR_TRUNC) && + "Expected G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC"); + + if (MatcherOpcode == GIM_CheckIsBuildVectorAllOnes) { + if (!isBuildVectorAllOnes(*MI, MRI)) { + if (handleReject() == RejectAndGiveUp) + return false; + } + } else { + if (!isBuildVectorAllZeros(*MI, MRI)) { + if (handleReject() == RejectAndGiveUp) + return false; + } + } + + break; + } case GIM_CheckCxxInsnPredicate: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t Predicate = MatchTable[CurrentIdx++]; @@ -331,7 +367,8 @@ bool InstructionSelector::executeMatchTable( assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); assert(Predicate > GIPFP_MI_Invalid && "Expected a valid predicate"); - if (!testMIPredicate_MI(Predicate, *State.MIs[InsnID])) + if (!testMIPredicate_MI(Predicate, *State.MIs[InsnID], + State.RecordedOperands)) if (handleReject() == RejectAndGiveUp) return false; break; @@ -581,6 +618,20 @@ bool InstructionSelector::executeMatchTable( break; } + case GIM_RecordNamedOperand: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + uint64_t StoreIdx = MatchTable[CurrentIdx++]; + + DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), + dbgs() << CurrentIdx << ": GIM_RecordNamedOperand(MIs[" + << InsnID << "]->getOperand(" << OpIdx + << "), StoreIdx=" << StoreIdx << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + assert(StoreIdx < State.RecordedOperands.size() && "Index out of range"); + State.RecordedOperands[StoreIdx] = &State.MIs[InsnID]->getOperand(OpIdx); + break; + } case GIM_CheckRegBankForClass: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -1007,8 +1058,12 @@ bool InstructionSelector::executeMatchTable( int64_t OpIdx = MatchTable[CurrentIdx++]; int64_t RCEnum = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); - constrainOperandRegToRegClass(*OutMIs[InsnID].getInstr(), OpIdx, - *TRI.getRegClass(RCEnum), TII, TRI, RBI); + MachineInstr &I = *OutMIs[InsnID].getInstr(); + MachineFunction &MF = *I.getParent()->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterClass &RC = *TRI.getRegClass(RCEnum); + MachineOperand &MO = I.getOperand(OpIdx); + constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, RC, MO); DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << CurrentIdx << ": GIR_ConstrainOperandRC(OutMIs[" << InsnID << "], " << OpIdx << ", " << RCEnum diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 016b0bacab85..e7bda3b4bd97 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -105,19 +105,23 @@ public: Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); // zext(trunc x) - > and (aext/copy/trunc x), mask + // zext(sext x) -> and (sext x), mask Register TruncSrc; - if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { + Register SextSrc; + if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) || + mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) { LLT DstTy = MRI.getType(DstReg); if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) || isConstantUnsupported(DstTy)) return false; LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); LLT SrcTy = MRI.getType(SrcReg); - APInt Mask = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits()); - auto MIBMask = Builder.buildConstant( - DstTy, Mask.zext(DstTy.getScalarSizeInBits())); - Builder.buildAnd(DstReg, Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), - MIBMask); + APInt MaskVal = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits()); + auto Mask = Builder.buildConstant( + DstTy, MaskVal.zext(DstTy.getScalarSizeInBits())); + auto Extended = SextSrc ? Builder.buildSExtOrTrunc(DstTy, SextSrc) : + Builder.buildAnyExtOrTrunc(DstTy, TruncSrc); + Builder.buildAnd(DstReg, Extended, Mask); markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } @@ -482,7 +486,7 @@ public: MachineRegisterInfo &MRI, MachineIRBuilder &Builder, SmallVectorImpl<Register> &UpdatedDefs, - GISelObserverWrapper &Observer) { + GISelChangeObserver &Observer) { if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) { Builder.buildCopy(DstReg, SrcReg); UpdatedDefs.push_back(DstReg); @@ -502,20 +506,78 @@ public: Observer.changedInstr(*UseMI); } - bool tryCombineMerges(MachineInstr &MI, - SmallVectorImpl<MachineInstr *> &DeadInsts, - SmallVectorImpl<Register> &UpdatedDefs, - GISelObserverWrapper &Observer) { + /// Return the operand index in \p MI that defines \p Def + static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) { + unsigned DefIdx = 0; + for (const MachineOperand &Def : MI.defs()) { + if (Def.getReg() == SearchDef) + break; + ++DefIdx; + } + + return DefIdx; + } + + bool tryCombineUnmergeValues(MachineInstr &MI, + SmallVectorImpl<MachineInstr *> &DeadInsts, + SmallVectorImpl<Register> &UpdatedDefs, + GISelChangeObserver &Observer) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); unsigned NumDefs = MI.getNumOperands() - 1; - MachineInstr *SrcDef = - getDefIgnoringCopies(MI.getOperand(NumDefs).getReg(), MRI); + Register SrcReg = MI.getOperand(NumDefs).getReg(); + MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI); if (!SrcDef) return false; LLT OpTy = MRI.getType(MI.getOperand(NumDefs).getReg()); LLT DestTy = MRI.getType(MI.getOperand(0).getReg()); + + if (SrcDef->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) { + // %0:_(<4 x s16>) = G_FOO + // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0 + // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 + // + // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0 + const unsigned NumSrcOps = SrcDef->getNumOperands(); + Register SrcUnmergeSrc = SrcDef->getOperand(NumSrcOps - 1).getReg(); + LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc); + + // If we need to decrease the number of vector elements in the result type + // of an unmerge, this would involve the creation of an equivalent unmerge + // to copy back to the original result registers. + LegalizeActionStep ActionStep = LI.getAction( + {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}}); + switch (ActionStep.Action) { + case LegalizeActions::Lower: + case LegalizeActions::Unsupported: + break; + case LegalizeActions::FewerElements: + case LegalizeActions::NarrowScalar: + if (ActionStep.TypeIdx == 1) + return false; + break; + default: + return false; + } + + Builder.setInstrAndDebugLoc(MI); + auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc); + + // TODO: Should we try to process out the other defs now? If the other + // defs of the source unmerge are also unmerged, we end up with a separate + // unmerge for each one. + unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg); + for (unsigned I = 0; I != NumDefs; ++I) { + Register Def = MI.getOperand(I).getReg(); + replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I), + MRI, Builder, UpdatedDefs, Observer); + } + + markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); + return true; + } + MachineInstr *MergeI = SrcDef; unsigned ConvertOp = 0; @@ -743,9 +805,12 @@ public: Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs); break; case TargetOpcode::G_UNMERGE_VALUES: - Changed = tryCombineMerges(MI, DeadInsts, UpdatedDefs, WrapperObserver); + Changed = + tryCombineUnmergeValues(MI, DeadInsts, UpdatedDefs, WrapperObserver); break; case TargetOpcode::G_MERGE_VALUES: + case TargetOpcode::G_BUILD_VECTOR: + case TargetOpcode::G_CONCAT_VECTORS: // If any of the users of this merge are an unmerge, then add them to the // artifact worklist in case there's folding that can be done looking up. for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) { @@ -829,7 +894,8 @@ private: /// dead. /// MI is not marked dead. void markDefDead(MachineInstr &MI, MachineInstr &DefMI, - SmallVectorImpl<MachineInstr *> &DeadInsts) { + SmallVectorImpl<MachineInstr *> &DeadInsts, + unsigned DefIdx = 0) { // Collect all the copy instructions that are made dead, due to deleting // this instruction. Collect all of them until the Trunc(DefMI). // Eg, @@ -856,8 +922,27 @@ private: break; PrevMI = TmpDef; } - if (PrevMI == &DefMI && MRI.hasOneUse(DefMI.getOperand(0).getReg())) - DeadInsts.push_back(&DefMI); + + if (PrevMI == &DefMI) { + unsigned I = 0; + bool IsDead = true; + for (MachineOperand &Def : DefMI.defs()) { + if (I != DefIdx) { + if (!MRI.use_empty(Def.getReg())) { + IsDead = false; + break; + } + } else { + if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg())) + break; + } + + ++I; + } + + if (IsDead) + DeadInsts.push_back(&DefMI); + } } /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be @@ -866,9 +951,10 @@ private: /// copies in between the extends and the truncs, and this attempts to collect /// the in between copies if they're dead. void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, - SmallVectorImpl<MachineInstr *> &DeadInsts) { + SmallVectorImpl<MachineInstr *> &DeadInsts, + unsigned DefIdx = 0) { DeadInsts.push_back(&MI); - markDefDead(MI, DefMI, DeadInsts); + markDefDead(MI, DefMI, DeadInsts, DefIdx); } /// Erase the dead instructions in the list and call the observer hooks. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h index e59bf1b91262..690e84f79a6b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h @@ -64,9 +64,6 @@ public: MachineFunctionProperties::Property::NoPHIs); } - bool combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI, - const TargetInstrInfo &TII); - bool runOnMachineFunction(MachineFunction &MF) override; static MFResult diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 058aacf38634..c3b494e94ff1 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -32,6 +32,7 @@ class LegalizerInfo; class Legalizer; class MachineRegisterInfo; class GISelChangeObserver; +class TargetLowering; class LegalizerHelper { public: @@ -45,6 +46,7 @@ public: private: MachineRegisterInfo &MRI; const LegalizerInfo &LI; + const TargetLowering &TLI; public: enum LegalizeResult { @@ -62,6 +64,7 @@ public: /// Expose LegalizerInfo so the clients can re-use. const LegalizerInfo &getLegalizerInfo() const { return LI; } + const TargetLowering &getTargetLowering() const { return TLI; } LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B); @@ -154,6 +157,10 @@ public: /// def by inserting a G_BITCAST from \p CastTy void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx); + /// Widen \p OrigReg to \p WideTy by merging to a wider type, padding with + /// G_IMPLICIT_DEF, and producing dead results. + Register widenWithUnmerge(LLT WideTy, Register OrigReg); + private: LegalizeResult widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); @@ -163,8 +170,10 @@ private: widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); LegalizeResult widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT WideTy); - LegalizeResult widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx, - LLT WideTy); + LegalizeResult widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy); + LegalizeResult widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy); /// Helper function to split a wide generic register into bitwise blocks with /// the given Type (which implies the number of blocks needed). The generic @@ -191,11 +200,19 @@ private: LLT PartTy, ArrayRef<Register> PartRegs, LLT LeftoverTy = LLT(), ArrayRef<Register> LeftoverRegs = {}); - /// Unmerge \p SrcReg into \p Parts with the greatest common divisor type with - /// \p DstTy and \p NarrowTy. Returns the GCD type. + /// Unmerge \p SrcReg into smaller sized values, and append them to \p + /// Parts. The elements of \p Parts will be the greatest common divisor type + /// of \p DstTy, \p NarrowTy and the type of \p SrcReg. This will compute and + /// return the GCD type. LLT extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy, LLT NarrowTy, Register SrcReg); + /// Unmerge \p SrcReg into \p GCDTy typed registers. This will append all of + /// the unpacked registers to \p Parts. This version is if the common unmerge + /// type is already known. + void extractGCDType(SmallVectorImpl<Register> &Parts, LLT GCDTy, + Register SrcReg); + /// Produce a merge of values in \p VRegs to define \p DstReg. Perform a merge /// from the least common multiple type, and convert as appropriate to \p /// DstReg. @@ -228,7 +245,23 @@ private: ArrayRef<Register> Src1Regs, ArrayRef<Register> Src2Regs, LLT NarrowTy); + void changeOpcode(MachineInstr &MI, unsigned NewOpcode); + public: + /// Return the alignment to use for a stack temporary object with the given + /// type. + Align getStackTemporaryAlignment(LLT Type, Align MinAlign = Align()) const; + + /// Create a stack temporary based on the size in bytes and the alignment + MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, + MachinePointerInfo &PtrInfo); + + /// Get a pointer to vector element \p Index located in memory for a vector of + /// type \p VecTy starting at a base address of \p VecPtr. If \p Index is out + /// of bounds the returned pointer is unspecified, but will be within the + /// vector bounds. + Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index); + LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); @@ -256,9 +289,11 @@ public: LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); - LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI, - unsigned TypeIdx, - LLT NarrowTy); + LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy); + LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); @@ -291,34 +326,52 @@ public: LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + /// Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT. + LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy); + + /// Perform Bitcast legalize action on G_INSERT_VECTOR_ELT. + LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, + LLT CastTy); + LegalizeResult lowerBitcast(MachineInstr &MI); - LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult lowerLoad(MachineInstr &MI); + LegalizeResult lowerStore(MachineInstr &MI); + LegalizeResult lowerBitCount(MachineInstr &MI); LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI); - LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty); - LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty); - LegalizeResult lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult lowerUITOFP(MachineInstr &MI); + LegalizeResult lowerSITOFP(MachineInstr &MI); + LegalizeResult lowerFPTOUI(MachineInstr &MI); LegalizeResult lowerFPTOSI(MachineInstr &MI); LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI); - LegalizeResult lowerFPTRUNC(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult lowerFPTRUNC(MachineInstr &MI); + LegalizeResult lowerFPOWI(MachineInstr &MI); - LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty); - LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult lowerMinMax(MachineInstr &MI); + LegalizeResult lowerFCopySign(MachineInstr &MI); LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI); LegalizeResult lowerFMad(MachineInstr &MI); LegalizeResult lowerIntrinsicRound(MachineInstr &MI); LegalizeResult lowerFFloor(MachineInstr &MI); LegalizeResult lowerMergeValues(MachineInstr &MI); LegalizeResult lowerUnmergeValues(MachineInstr &MI); + LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI); LegalizeResult lowerShuffleVector(MachineInstr &MI); LegalizeResult lowerDynStackAlloc(MachineInstr &MI); LegalizeResult lowerExtract(MachineInstr &MI); LegalizeResult lowerInsert(MachineInstr &MI); LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI); + LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI); + LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI); + LegalizeResult lowerShlSat(MachineInstr &MI); LegalizeResult lowerBswap(MachineInstr &MI); LegalizeResult lowerBitreverse(MachineInstr &MI); LegalizeResult lowerReadWriteRegister(MachineInstr &MI); + LegalizeResult lowerSMULH_UMULH(MachineInstr &MI); + LegalizeResult lowerSelect(MachineInstr &MI); + }; /// Helper function that creates a libcall to the given \p Name using the given diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 61e0418757bc..c0a89b6ae619 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -37,7 +37,6 @@ extern cl::opt<bool> DisableGISelLegalityCheck; class LegalizerHelper; class MachineInstr; -class MachineIRBuilder; class MachineRegisterInfo; class MCInstrInfo; class GISelChangeObserver; @@ -183,7 +182,7 @@ struct TypePairAndMemDesc { MemSize == Other.MemSize; } - /// \returns true if this memory access is legal with for the acecss described + /// \returns true if this memory access is legal with for the access described /// by \p Other (The alignment is sufficient for the size and result type). bool isCompatible(const TypePairAndMemDesc &Other) const { return Type0 == Other.Type0 && Type1 == Other.Type1 && @@ -218,11 +217,19 @@ Predicate any(Predicate P0, Predicate P1, Args... args) { return any(any(P0, P1), args...); } -/// True iff the given type index is the specified types. +/// True iff the given type index is the specified type. LegalityPredicate typeIs(unsigned TypeIdx, LLT TypesInit); /// True iff the given type index is one of the specified types. LegalityPredicate typeInSet(unsigned TypeIdx, std::initializer_list<LLT> TypesInit); + +/// True iff the given type index is not the specified type. +inline LegalityPredicate typeIsNot(unsigned TypeIdx, LLT Type) { + return [=](const LegalityQuery &Query) { + return Query.Types[TypeIdx] != Type; + }; +} + /// True iff the given types for the given pair of type indexes is one of the /// specified type pairs. LegalityPredicate @@ -308,6 +315,11 @@ LegalizeMutation changeElementTo(unsigned TypeIdx, unsigned FromTypeIdx); /// Keep the same scalar or element type as the given type. LegalizeMutation changeElementTo(unsigned TypeIdx, LLT Ty); +/// Change the scalar size or element size to have the same scalar size as type +/// index \p FromIndex. Unlike changeElementTo, this discards pointer types and +/// only changes the size. +LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx); + /// Widen the scalar type or vector element type for the given type index to the /// next power of 2. LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min = 0); @@ -616,8 +628,7 @@ public: /// The instruction is lowered when type index 0 is any type in the given /// list. Keep type index 0 as the same type. LegalizeRuleSet &lowerFor(std::initializer_list<LLT> Types) { - return actionFor(LegalizeAction::Lower, Types, - LegalizeMutations::changeTo(0, 0)); + return actionFor(LegalizeAction::Lower, Types); } /// The instruction is lowered when type index 0 is any type in the given /// list. @@ -628,8 +639,7 @@ public: /// The instruction is lowered when type indexes 0 and 1 is any type pair in /// the given list. Keep type index 0 as the same type. LegalizeRuleSet &lowerFor(std::initializer_list<std::pair<LLT, LLT>> Types) { - return actionFor(LegalizeAction::Lower, Types, - LegalizeMutations::changeTo(0, 0)); + return actionFor(LegalizeAction::Lower, Types); } /// The instruction is lowered when type indexes 0 and 1 is any type pair in /// the given list. @@ -654,6 +664,15 @@ public: Types2); } + /// The instruction is emitted as a library call. + LegalizeRuleSet &libcall() { + using namespace LegalizeMutations; + // We have no choice but conservatively assume that predicate-less lowering + // properly handles all type indices by design: + markAllIdxsAsCovered(); + return actionIf(LegalizeAction::Libcall, always); + } + /// Like legalIf, but for the Libcall action. LegalizeRuleSet &libcallIf(LegalityPredicate Predicate) { // We have no choice but conservatively assume that a libcall with a @@ -696,6 +715,13 @@ public: markAllIdxsAsCovered(); return actionIf(LegalizeAction::NarrowScalar, Predicate, Mutation); } + /// Narrow the scalar, specified in mutation, when type indexes 0 and 1 is any + /// type pair in the given list. + LegalizeRuleSet & + narrowScalarFor(std::initializer_list<std::pair<LLT, LLT>> Types, + LegalizeMutation Mutation) { + return actionFor(LegalizeAction::NarrowScalar, Types, Mutation); + } /// Add more elements to reach the type selected by the mutation if the /// predicate is true. @@ -800,6 +826,13 @@ public: LegalizeMutations::scalarize(TypeIdx)); } + LegalizeRuleSet &scalarizeIf(LegalityPredicate Predicate, unsigned TypeIdx) { + using namespace LegalityPredicates; + return actionIf(LegalizeAction::FewerElements, + all(Predicate, isVector(typeIdx(TypeIdx))), + LegalizeMutations::scalarize(TypeIdx)); + } + /// Ensure the scalar or element is at least as wide as Ty. LegalizeRuleSet &minScalarOrElt(unsigned TypeIdx, const LLT Ty) { using namespace LegalityPredicates; @@ -857,7 +890,10 @@ public: return actionIf( LegalizeAction::NarrowScalar, [=](const LegalityQuery &Query) { - return scalarWiderThan(TypeIdx, Ty.getSizeInBits()) && Predicate(Query); + const LLT QueryTy = Query.Types[TypeIdx]; + return QueryTy.isScalar() && + QueryTy.getSizeInBits() > Ty.getSizeInBits() && + Predicate(Query); }, changeElementTo(typeIdx(TypeIdx), Ty)); } @@ -883,11 +919,25 @@ public: return Query.Types[LargeTypeIdx].getScalarSizeInBits() > Query.Types[TypeIdx].getSizeInBits(); }, + LegalizeMutations::changeElementSizeTo(TypeIdx, LargeTypeIdx)); + } + + /// Narrow the scalar to match the size of another. + LegalizeRuleSet &maxScalarSameAs(unsigned TypeIdx, unsigned NarrowTypeIdx) { + typeIdx(TypeIdx); + return narrowScalarIf( [=](const LegalityQuery &Query) { - LLT T = Query.Types[LargeTypeIdx]; - return std::make_pair(TypeIdx, - T.isVector() ? T.getElementType() : T); - }); + return Query.Types[NarrowTypeIdx].getScalarSizeInBits() < + Query.Types[TypeIdx].getSizeInBits(); + }, + LegalizeMutations::changeElementSizeTo(TypeIdx, NarrowTypeIdx)); + } + + /// Change the type \p TypeIdx to have the same scalar size as type \p + /// SameSizeIdx. + LegalizeRuleSet &scalarSameSizeAs(unsigned TypeIdx, unsigned SameSizeIdx) { + return minScalarSameAs(TypeIdx, SameSizeIdx) + .maxScalarSameAs(TypeIdx, SameSizeIdx); } /// Conditionally widen the scalar or elt to match the size of another. @@ -1207,6 +1257,12 @@ public: bool isLegal(const LegalityQuery &Query) const { return getAction(Query).Action == LegalizeAction::Legal; } + + bool isLegalOrCustom(const LegalityQuery &Query) const { + auto Action = getAction(Query).Action; + return Action == LegalizeAction::Legal || Action == LegalizeAction::Custom; + } + bool isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const; bool isLegalOrCustom(const MachineInstr &MI, const MachineRegisterInfo &MRI) const; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h index 67e450641eaf..1d1afff7f934 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h @@ -52,9 +52,6 @@ private: /// TTI used for getting remat costs for instructions. TargetTransformInfo *TTI; - /// Check whether or not \p MI needs to be moved close to its uses. - bool shouldLocalize(const MachineInstr &MI); - /// Check if \p MOUse is used in the same basic block as \p Def. /// If the use is in the same block, we say it is local. /// When the use is not local, \p InsertMBB will contain the basic @@ -67,6 +64,11 @@ private: typedef SmallSetVector<MachineInstr *, 32> LocalizedSetVecT; + /// If \p Op is a phi operand and not unique in that phi, that is, + /// there are other operands in the phi with the same register, + /// return true. + bool isNonUniquePhiValue(MachineOperand &Op) const; + /// Do inter-block localization from the entry block. bool localizeInterBlock(MachineFunction &MF, LocalizedSetVecT &LocalizedInstrs); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index 043be086ff41..55d6d365fbb4 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -39,11 +39,25 @@ inline OneUse_match<SubPat> m_OneUse(const SubPat &SP) { return SP; } +template <typename SubPatternT> struct OneNonDBGUse_match { + SubPatternT SubPat; + OneNonDBGUse_match(const SubPatternT &SP) : SubPat(SP) {} + + bool match(const MachineRegisterInfo &MRI, Register Reg) { + return MRI.hasOneNonDBGUse(Reg) && SubPat.match(MRI, Reg); + } +}; + +template <typename SubPat> +inline OneNonDBGUse_match<SubPat> m_OneNonDBGUse(const SubPat &SP) { + return SP; +} + struct ConstantMatch { int64_t &CR; ConstantMatch(int64_t &C) : CR(C) {} bool match(const MachineRegisterInfo &MRI, Register Reg) { - if (auto MaybeCst = getConstantVRegVal(Reg, MRI)) { + if (auto MaybeCst = getConstantVRegSExtVal(Reg, MRI)) { CR = *MaybeCst; return true; } @@ -53,6 +67,29 @@ struct ConstantMatch { inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); } +/// Matcher for a specific constant value. +struct SpecificConstantMatch { + int64_t RequestedVal; + SpecificConstantMatch(int64_t RequestedVal) : RequestedVal(RequestedVal) {} + bool match(const MachineRegisterInfo &MRI, Register Reg) { + int64_t MatchedVal; + return mi_match(Reg, MRI, m_ICst(MatchedVal)) && MatchedVal == RequestedVal; + } +}; + +/// Matches a constant equal to \p RequestedValue. +inline SpecificConstantMatch m_SpecificICst(int64_t RequestedValue) { + return SpecificConstantMatch(RequestedValue); +} + +///{ +/// Convenience matchers for specific integer values. +inline SpecificConstantMatch m_ZeroInt() { return SpecificConstantMatch(0); } +inline SpecificConstantMatch m_AllOnesInt() { + return SpecificConstantMatch(-1); +} +///} + // TODO: Rework this for different kinds of MachineOperand. // Currently assumes the Src for a match is a register. // We might want to support taking in some MachineOperands and call getReg on @@ -198,6 +235,12 @@ m_GAdd(const LHS &L, const RHS &R) { } template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_PTR_ADD, true> +m_GPtrAdd(const LHS &L, const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_PTR_ADD, true>(L, R); +} + +template <typename LHS, typename RHS> inline BinaryOp_match<LHS, RHS, TargetOpcode::G_SUB> m_GSub(const LHS &L, const RHS &R) { return BinaryOp_match<LHS, RHS, TargetOpcode::G_SUB>(L, R); @@ -234,6 +277,12 @@ m_GAnd(const LHS &L, const RHS &R) { } template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_XOR, true> +m_GXor(const LHS &L, const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_XOR, true>(L, R); +} + +template <typename LHS, typename RHS> inline BinaryOp_match<LHS, RHS, TargetOpcode::G_OR, true> m_GOr(const LHS &L, const RHS &R) { return BinaryOp_match<LHS, RHS, TargetOpcode::G_OR, true>(L, R); @@ -251,6 +300,12 @@ m_GLShr(const LHS &L, const RHS &R) { return BinaryOp_match<LHS, RHS, TargetOpcode::G_LSHR, false>(L, R); } +template <typename LHS, typename RHS> +inline BinaryOp_match<LHS, RHS, TargetOpcode::G_ASHR, false> +m_GAShr(const LHS &L, const RHS &R) { + return BinaryOp_match<LHS, RHS, TargetOpcode::G_ASHR, false>(L, R); +} + // Helper for unary instructions (G_[ZSA]EXT/G_TRUNC) etc template <typename SrcTy, unsigned Opcode> struct UnaryOp_match { SrcTy L; @@ -384,6 +439,51 @@ struct CheckType { inline CheckType m_SpecificType(LLT Ty) { return Ty; } +template <typename Src0Ty, typename Src1Ty, typename Src2Ty, unsigned Opcode> +struct TernaryOp_match { + Src0Ty Src0; + Src1Ty Src1; + Src2Ty Src2; + + TernaryOp_match(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2) + : Src0(Src0), Src1(Src1), Src2(Src2) {} + template <typename OpTy> + bool match(const MachineRegisterInfo &MRI, OpTy &&Op) { + MachineInstr *TmpMI; + if (mi_match(Op, MRI, m_MInstr(TmpMI))) { + if (TmpMI->getOpcode() == Opcode && TmpMI->getNumOperands() == 4) { + return (Src0.match(MRI, TmpMI->getOperand(1).getReg()) && + Src1.match(MRI, TmpMI->getOperand(2).getReg()) && + Src2.match(MRI, TmpMI->getOperand(3).getReg())); + } + } + return false; + } +}; +template <typename Src0Ty, typename Src1Ty, typename Src2Ty> +inline TernaryOp_match<Src0Ty, Src1Ty, Src2Ty, + TargetOpcode::G_INSERT_VECTOR_ELT> +m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2) { + return TernaryOp_match<Src0Ty, Src1Ty, Src2Ty, + TargetOpcode::G_INSERT_VECTOR_ELT>(Src0, Src1, Src2); +} + +/// Matches a register negated by a G_SUB. +/// G_SUB 0, %negated_reg +template <typename SrcTy> +inline BinaryOp_match<SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB> +m_Neg(const SrcTy &&Src) { + return m_GSub(m_ZeroInt(), Src); +} + +/// Matches a register not-ed by a G_XOR. +/// G_XOR %not_reg, -1 +template <typename SrcTy> +inline BinaryOp_match<SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true> +m_Not(const SrcTy &&Src) { + return m_GXor(Src, m_AllOnesInt()); +} + } // namespace GMIPatternMatch } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index d6498345f25c..1ab4cd704824 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -18,9 +18,10 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" - +#include "llvm/IR/Module.h" namespace llvm { @@ -223,6 +224,7 @@ class MachineIRBuilder { protected: void validateTruncExt(const LLT Dst, const LLT Src, bool IsExtend); + void validateUnaryOp(const LLT Res, const LLT Op0); void validateBinaryOp(const LLT Res, const LLT Op0, const LLT Op1); void validateShiftOp(const LLT Res, const LLT Op0, const LLT Op1); @@ -250,6 +252,11 @@ public: setDebugLoc(MI.getDebugLoc()); } + MachineIRBuilder(MachineInstr &MI, GISelChangeObserver &Observer) : + MachineIRBuilder(MI) { + setChangeObserver(Observer); + } + virtual ~MachineIRBuilder() = default; MachineIRBuilder(const MachineIRBuilderState &BState) : State(BState) {} @@ -729,7 +736,7 @@ public: /// depend on bit 0 (for now). /// /// \return The newly created instruction. - MachineInstrBuilder buildBrCond(Register Tst, MachineBasicBlock &Dest); + MachineInstrBuilder buildBrCond(const SrcOp &Tst, MachineBasicBlock &Dest); /// Build and insert G_BRINDIRECT \p Tgt /// @@ -813,7 +820,17 @@ public: /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, - MachineMemOperand &MMO); + MachineMemOperand &MMO) { + return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO); + } + + /// Build and insert a G_LOAD instruction, while constructing the + /// MachineMemOperand. + MachineInstrBuilder + buildLoad(const DstOp &Res, const SrcOp &Addr, MachinePointerInfo PtrInfo, + Align Alignment, + MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, + const AAMDNodes &AAInfo = AAMDNodes()); /// Build and insert `Res = <opcode> Addr, MMO`. /// @@ -847,6 +864,14 @@ public: MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO); + /// Build and insert a G_STORE instruction, while constructing the + /// MachineMemOperand. + MachineInstrBuilder + buildStore(const SrcOp &Val, const SrcOp &Addr, MachinePointerInfo PtrInfo, + Align Alignment, + MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, + const AAMDNodes &AAInfo = AAMDNodes()); + /// Build and insert `Res0, ... = G_EXTRACT Src, Idx0`. /// /// \pre setBasicBlock or setMI must have been called. @@ -938,6 +963,23 @@ public: MachineInstrBuilder buildBuildVectorTrunc(const DstOp &Res, ArrayRef<Register> Ops); + /// Build and insert a vector splat of a scalar \p Src using a + /// G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idiom. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Src must have the same type as the element type of \p Dst + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src); + + /// Build and insert \p Res = G_SHUFFLE_VECTOR \p Src1, \p Src2, \p Mask + /// + /// \pre setBasicBlock or setMI must have been called. + /// + /// \return a MachineInstrBuilder for the newly created instruction. + MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, + const SrcOp &Src2, ArrayRef<int> Mask); + /// Build and insert \p Res = G_CONCAT_VECTORS \p Op0, ... /// /// G_CONCAT_VECTORS creates a vector from the concatenation of 2 or more @@ -1521,6 +1563,13 @@ public: return buildInstr(TargetOpcode::G_FSUB, {Dst}, {Src0, Src1}, Flags); } + /// Build and insert \p Res = G_FDIV \p Op0, \p Op1 + MachineInstrBuilder buildFDiv(const DstOp &Dst, const SrcOp &Src0, + const SrcOp &Src1, + Optional<unsigned> Flags = None) { + return buildInstr(TargetOpcode::G_FDIV, {Dst}, {Src0, Src1}, Flags); + } + /// Build and insert \p Res = G_FMA \p Op0, \p Op1, \p Op2 MachineInstrBuilder buildFMA(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, const SrcOp &Src2, @@ -1583,6 +1632,13 @@ public: return buildInstr(TargetOpcode::G_FEXP2, {Dst}, {Src}, Flags); } + /// Build and insert \p Dst = G_FPOW \p Src0, \p Src1 + MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, + const SrcOp &Src1, + Optional<unsigned> Flags = None) { + return buildInstr(TargetOpcode::G_FPOW, {Dst}, {Src0, Src1}, Flags); + } + /// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1 MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1) { @@ -1633,6 +1689,11 @@ public: return buildInstr(TargetOpcode::G_UMAX, {Dst}, {Src0, Src1}); } + /// Build and insert \p Dst = G_ABS \p Src + MachineInstrBuilder buildAbs(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_ABS, {Dst}, {Src}); + } + /// Build and insert \p Res = G_JUMP_TABLE \p JTI /// /// G_JUMP_TABLE sets \p Res to the address of the jump table specified by @@ -1641,6 +1702,101 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildJumpTable(const LLT PtrTy, unsigned JTI); + /// Build and insert \p Res = G_VECREDUCE_SEQ_FADD \p ScalarIn, \p VecIn + /// + /// \p ScalarIn is the scalar accumulator input to start the sequential + /// reduction operation of \p VecIn. + MachineInstrBuilder buildVecReduceSeqFAdd(const DstOp &Dst, + const SrcOp &ScalarIn, + const SrcOp &VecIn) { + return buildInstr(TargetOpcode::G_VECREDUCE_SEQ_FADD, {Dst}, + {ScalarIn, {VecIn}}); + } + + /// Build and insert \p Res = G_VECREDUCE_SEQ_FMUL \p ScalarIn, \p VecIn + /// + /// \p ScalarIn is the scalar accumulator input to start the sequential + /// reduction operation of \p VecIn. + MachineInstrBuilder buildVecReduceSeqFMul(const DstOp &Dst, + const SrcOp &ScalarIn, + const SrcOp &VecIn) { + return buildInstr(TargetOpcode::G_VECREDUCE_SEQ_FMUL, {Dst}, + {ScalarIn, {VecIn}}); + } + + /// Build and insert \p Res = G_VECREDUCE_FADD \p Src + /// + /// \p ScalarIn is the scalar accumulator input to the reduction operation of + /// \p VecIn. + MachineInstrBuilder buildVecReduceFAdd(const DstOp &Dst, + const SrcOp &ScalarIn, + const SrcOp &VecIn) { + return buildInstr(TargetOpcode::G_VECREDUCE_FADD, {Dst}, {ScalarIn, VecIn}); + } + + /// Build and insert \p Res = G_VECREDUCE_FMUL \p Src + /// + /// \p ScalarIn is the scalar accumulator input to the reduction operation of + /// \p VecIn. + MachineInstrBuilder buildVecReduceFMul(const DstOp &Dst, + const SrcOp &ScalarIn, + const SrcOp &VecIn) { + return buildInstr(TargetOpcode::G_VECREDUCE_FMUL, {Dst}, {ScalarIn, VecIn}); + } + + /// Build and insert \p Res = G_VECREDUCE_FMAX \p Src + MachineInstrBuilder buildVecReduceFMax(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_FMAX, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_FMIN \p Src + MachineInstrBuilder buildVecReduceFMin(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_FMIN, {Dst}, {Src}); + } + /// Build and insert \p Res = G_VECREDUCE_ADD \p Src + MachineInstrBuilder buildVecReduceAdd(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_ADD, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_MUL \p Src + MachineInstrBuilder buildVecReduceMul(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_MUL, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_AND \p Src + MachineInstrBuilder buildVecReduceAnd(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_AND, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_OR \p Src + MachineInstrBuilder buildVecReduceOr(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_OR, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_XOR \p Src + MachineInstrBuilder buildVecReduceXor(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_XOR, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_SMAX \p Src + MachineInstrBuilder buildVecReduceSMax(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_SMAX, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_SMIN \p Src + MachineInstrBuilder buildVecReduceSMin(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_SMIN, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_UMAX \p Src + MachineInstrBuilder buildVecReduceUMax(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_UMAX, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_UMIN \p Src + MachineInstrBuilder buildVecReduceUMin(const DstOp &Dst, const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_UMIN, {Dst}, {Src}); + } virtual MachineInstrBuilder buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, ArrayRef<SrcOp> SrcOps, Optional<unsigned> Flags = None); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h index 8725d96efd82..da785406bc31 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h @@ -104,36 +104,37 @@ public: /// Currently the TableGen-like file would look like: /// \code /// PartialMapping[] = { - /// /*32-bit add*/ {0, 32, GPR}, // Scalar entry repeated for first vec elt. - /// /*2x32-bit add*/ {0, 32, GPR}, {32, 32, GPR}, - /// /*<2x32-bit> vadd {0, 64, VPR} + /// /*32-bit add*/ {0, 32, GPR}, // Scalar entry repeated for first + /// // vec elt. + /// /*2x32-bit add*/ {0, 32, GPR}, {32, 32, GPR}, + /// /*<2x32-bit> vadd*/ {0, 64, VPR} /// }; // PartialMapping duplicated. /// /// ValueMapping[] { - /// /*plain 32-bit add*/ {&PartialMapping[0], 1}, + /// /*plain 32-bit add*/ {&PartialMapping[0], 1}, /// /*expanded vadd on 2xadd*/ {&PartialMapping[1], 2}, - /// /*plain <2x32-bit> vadd*/ {&PartialMapping[3], 1} + /// /*plain <2x32-bit> vadd*/ {&PartialMapping[3], 1} /// }; /// \endcode /// /// With the array of pointer, we would have: /// \code /// PartialMapping[] = { - /// /*32-bit add lower */ {0, 32, GPR}, + /// /*32-bit add lower */ { 0, 32, GPR}, /// /*32-bit add upper */ {32, 32, GPR}, - /// /*<2x32-bit> vadd {0, 64, VPR} + /// /*<2x32-bit> vadd */ { 0, 64, VPR} /// }; // No more duplication. /// /// BreakDowns[] = { - /// /*AddBreakDown*/ &PartialMapping[0], + /// /*AddBreakDown*/ &PartialMapping[0], /// /*2xAddBreakDown*/ &PartialMapping[0], &PartialMapping[1], - /// /*VAddBreakDown*/ &PartialMapping[2] + /// /*VAddBreakDown*/ &PartialMapping[2] /// }; // Addresses of PartialMapping duplicated (smaller). /// /// ValueMapping[] { - /// /*plain 32-bit add*/ {&BreakDowns[0], 1}, + /// /*plain 32-bit add*/ {&BreakDowns[0], 1}, /// /*expanded vadd on 2xadd*/ {&BreakDowns[1], 2}, - /// /*plain <2x32-bit> vadd*/ {&BreakDowns[3], 1} + /// /*plain <2x32-bit> vadd*/ {&BreakDowns[3], 1} /// }; /// \endcode /// diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 42d86917721a..68553ab5b1a8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -18,11 +18,12 @@ #include "llvm/CodeGen/Register.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Support/MachineValueType.h" +#include <cstdint> namespace llvm { class AnalysisUsage; +class GISelKnownBits; class MachineFunction; class MachineInstr; class MachineOperand; @@ -33,10 +34,10 @@ class MachineRegisterInfo; class MCInstrDesc; class RegisterBankInfo; class TargetInstrInfo; +class TargetLowering; class TargetPassConfig; class TargetRegisterInfo; class TargetRegisterClass; -class Twine; class ConstantFP; class APFloat; @@ -51,9 +52,10 @@ Register constrainRegToClass(MachineRegisterInfo &MRI, /// Constrain the Register operand OpIdx, so that it is now constrained to the /// TargetRegisterClass passed as an argument (RegClass). -/// If this fails, create a new virtual register in the correct class and -/// insert a COPY before \p InsertPt if it is a use or after if it is a -/// definition. The debug location of \p InsertPt is used for the new copy. +/// If this fails, create a new virtual register in the correct class and insert +/// a COPY before \p InsertPt if it is a use or after if it is a definition. +/// In both cases, the function also updates the register of RegMo. The debug +/// location of \p InsertPt is used for the new copy. /// /// \return The virtual register constrained to the right register class. Register constrainOperandRegClass(const MachineFunction &MF, @@ -63,12 +65,13 @@ Register constrainOperandRegClass(const MachineFunction &MF, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, - const MachineOperand &RegMO); + MachineOperand &RegMO); -/// Try to constrain Reg so that it is usable by argument OpIdx of the -/// provided MCInstrDesc \p II. If this fails, create a new virtual -/// register in the correct class and insert a COPY before \p InsertPt -/// if it is a use or after if it is a definition. +/// Try to constrain Reg so that it is usable by argument OpIdx of the provided +/// MCInstrDesc \p II. If this fails, create a new virtual register in the +/// correct class and insert a COPY before \p InsertPt if it is a use or after +/// if it is a definition. In both cases, the function also updates the register +/// of RegMo. /// This is equivalent to constrainOperandRegClass(..., RegClass, ...) /// with RegClass obtained from the MCInstrDesc. The debug location of \p /// InsertPt is used for the new copy. @@ -80,7 +83,7 @@ Register constrainOperandRegClass(const MachineFunction &MF, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, - const MachineOperand &RegMO, unsigned OpIdx); + MachineOperand &RegMO, unsigned OpIdx); /// Mutate the newly-selected instruction \p I to constrain its (possibly /// generic) virtual register operands to the instruction's register class. @@ -121,14 +124,19 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC, MachineOptimizationRemarkEmitter &MORE, MachineOptimizationRemarkMissed &R); +/// If \p VReg is defined by a G_CONSTANT, return the corresponding value. +Optional<APInt> getConstantVRegVal(Register VReg, + const MachineRegisterInfo &MRI); + /// If \p VReg is defined by a G_CONSTANT fits in int64_t /// returns it. -Optional<int64_t> getConstantVRegVal(Register VReg, - const MachineRegisterInfo &MRI); +Optional<int64_t> getConstantVRegSExtVal(Register VReg, + const MachineRegisterInfo &MRI); + /// Simple struct used to hold a constant integer value and a virtual /// register. struct ValueAndVReg { - int64_t Value; + APInt Value; Register VReg; }; /// If \p VReg is defined by a statically evaluable chain of @@ -138,10 +146,13 @@ struct ValueAndVReg { /// When \p LookThroughInstrs == false this function behaves like /// getConstantVRegVal. /// When \p HandleFConstants == false the function bails on G_FCONSTANTs. +/// When \p LookThroughAnyExt == true the function treats G_ANYEXT same as +/// G_SEXT. Optional<ValueAndVReg> getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs = true, - bool HandleFConstants = true); + bool HandleFConstants = true, + bool LookThroughAnyExt = false); const ConstantFP* getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI); @@ -151,9 +162,20 @@ const ConstantFP* getConstantFPVRegVal(Register VReg, MachineInstr *getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI); -/// Find the def instruction for \p Reg, folding away any trivial copies. Note -/// it may still return a COPY, if it changes the type. May return nullptr if \p -/// Reg is not a generic virtual register. +/// Simple struct used to hold a Register value and the instruction which +/// defines it. +struct DefinitionAndSourceRegister { + MachineInstr *MI; + Register Reg; +}; + +/// Find the def instruction for \p Reg, and underlying value Register folding +/// away any copies. +Optional<DefinitionAndSourceRegister> +getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI); + +/// Find the def instruction for \p Reg, folding away any trivial copies. May +/// return nullptr if \p Reg is not a generic virtual register. MachineInstr *getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI); @@ -178,6 +200,12 @@ Optional<APInt> ConstantFoldBinOp(unsigned Opcode, const Register Op1, Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1, uint64_t Imm, const MachineRegisterInfo &MRI); +/// Test if the given value is known to have exactly one bit set. This differs +/// from computeKnownBits in that it doesn't necessarily determine which bit is +/// set. +bool isKnownToBeAPowerOfTwo(Register Val, const MachineRegisterInfo &MRI, + GISelKnownBits *KnownBits = nullptr); + /// Returns true if \p Val can be assumed to never be a NaN. If \p SNaN is true, /// this returns if \p Val can be assumed to never be a signaling NaN. bool isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, @@ -190,17 +218,65 @@ inline bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI) { Align inferAlignFromPtrInfo(MachineFunction &MF, const MachinePointerInfo &MPO); -/// Return the least common multiple type of \p Ty0 and \p Ty1, by changing -/// the number of vector elements or scalar bitwidth. The intent is a -/// G_MERGE_VALUES can be constructed from \p Ty0 elements, and unmerged into -/// \p Ty1. -LLT getLCMType(LLT Ty0, LLT Ty1); +/// Return a virtual register corresponding to the incoming argument register \p +/// PhysReg. This register is expected to have class \p RC, and optional type \p +/// RegTy. This assumes all references to the register will use the same type. +/// +/// If there is an existing live-in argument register, it will be returned. +/// This will also ensure there is a valid copy +Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, + MCRegister PhysReg, + const TargetRegisterClass &RC, + LLT RegTy = LLT()); + +/// Return the least common multiple type of \p OrigTy and \p TargetTy, by changing the +/// number of vector elements or scalar bitwidth. The intent is a +/// G_MERGE_VALUES, G_BUILD_VECTOR, or G_CONCAT_VECTORS can be constructed from +/// \p OrigTy elements, and unmerged into \p TargetTy +LLVM_READNONE +LLT getLCMType(LLT OrigTy, LLT TargetTy); -/// Return a type that is greatest common divisor of \p OrigTy and \p -/// TargetTy. This will either change the number of vector elements, or -/// bitwidth of scalars. The intent is the result type can be used as the -/// result of a G_UNMERGE_VALUES from \p OrigTy. +/// Return a type where the total size is the greatest common divisor of \p +/// OrigTy and \p TargetTy. This will try to either change the number of vector +/// elements, or bitwidth of scalars. The intent is the result type can be used +/// as the result of a G_UNMERGE_VALUES from \p OrigTy, and then some +/// combination of G_MERGE_VALUES, G_BUILD_VECTOR and G_CONCAT_VECTORS (possibly +/// with intermediate casts) can re-form \p TargetTy. +/// +/// If these are vectors with different element types, this will try to produce +/// a vector with a compatible total size, but the element type of \p OrigTy. If +/// this can't be satisfied, this will produce a scalar smaller than the +/// original vector elements. +/// +/// In the worst case, this returns LLT::scalar(1) +LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy); +/// \returns The splat index of a G_SHUFFLE_VECTOR \p MI when \p MI is a splat. +/// If \p MI is not a splat, returns None. +Optional<int> getSplatIndex(MachineInstr &MI); + +/// Returns a scalar constant of a G_BUILD_VECTOR splat if it exists. +Optional<int64_t> getBuildVectorConstantSplat(const MachineInstr &MI, + const MachineRegisterInfo &MRI); + +/// Return true if the specified instruction is a G_BUILD_VECTOR or +/// G_BUILD_VECTOR_TRUNC where all of the elements are 0 or undef. +bool isBuildVectorAllZeros(const MachineInstr &MI, + const MachineRegisterInfo &MRI); + +/// Return true if the specified instruction is a G_BUILD_VECTOR or +/// G_BUILD_VECTOR_TRUNC where all of the elements are ~0 or undef. +bool isBuildVectorAllOnes(const MachineInstr &MI, + const MachineRegisterInfo &MRI); + +/// Returns true if given the TargetLowering's boolean contents information, +/// the value \p Val contains a true value. +bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, + bool IsFP); + +/// Returns an integer representing true, as defined by the +/// TargetBooleanContents. +int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP); } // End namespace llvm. #endif diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 534f988c5e96..1974e2f842c9 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -86,7 +86,16 @@ enum NodeType { /// the parent's frame or return address, and so on. FRAMEADDR, RETURNADDR, + + /// ADDROFRETURNADDR - Represents the llvm.addressofreturnaddress intrinsic. + /// This node takes no operand, returns a target-specific pointer to the + /// place in the stack frame where the return address of the current + /// function is stored. ADDROFRETURNADDR, + + /// SPONENTRY - Represents the llvm.sponentry intrinsic. Takes no argument + /// and returns the stack pointer value at the entry of the current + /// function calling this intrinsic. SPONENTRY, /// LOCAL_RECOVER - Represents the llvm.localrecover intrinsic. @@ -274,6 +283,16 @@ enum NodeType { ADDCARRY, SUBCARRY, + /// Carry-using overflow-aware nodes for multiple precision addition and + /// subtraction. These nodes take three operands: The first two are normal lhs + /// and rhs to the add or sub, and the third is a boolean indicating if there + /// is an incoming carry. They produce two results: the normal result of the + /// add or sub, and a boolean that indicates if an overflow occured (*not* + /// flag, because it may be a store to memory, etc.). If the type of the + /// boolean is not i1 then the high bits conform to getBooleanContents. + SADDO_CARRY, + SSUBO_CARRY, + /// RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition. /// These nodes take two operands: the normal LHS and RHS to the add. They /// produce two results: the normal result of the add, and a boolean that @@ -310,6 +329,16 @@ enum NodeType { SSUBSAT, USUBSAT, + /// RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift. The first + /// operand is the value to be shifted, and the second argument is the amount + /// to shift by. Both must be integers of the same bit width (W). If the true + /// value of LHS << RHS exceeds the largest value that can be represented by + /// W bits, the resulting value is this maximum value, Otherwise, if this + /// value is less than the smallest value that can be represented by W bits, + /// the resulting value is this minimum value. + SSHLSAT, + USHLSAT, + /// RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication /// on /// 2 integers with the same width and scale. SCALE represents the scale of @@ -504,7 +533,8 @@ enum NodeType { /// IDX is first scaled by the runtime scaling factor of T. Elements IDX /// through (IDX + num_elements(T) - 1) must be valid VECTOR indices. If this /// condition cannot be determined statically but is false at runtime, then - /// the result vector is undefined. + /// the result vector is undefined. The IDX parameter must be a vector index + /// constant type, which for most targets will be an integer pointer type. /// /// This operation supports extracting a fixed-width vector from a scalable /// vector, but not the other way around. @@ -587,6 +617,7 @@ enum NodeType { CTLZ, CTPOP, BITREVERSE, + PARITY, /// Bit counting operators with an undefined result for zero inputs. CTTZ_ZERO_UNDEF, @@ -703,6 +734,21 @@ enum NodeType { FP_TO_SINT, FP_TO_UINT, + /// FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a + /// signed or unsigned integer type with the bit width given in operand 1 with + /// the following semantics: + /// + /// * If the value is NaN, zero is returned. + /// * If the value is larger/smaller than the largest/smallest integer, + /// the largest/smallest integer is returned (saturation). + /// * Otherwise the result of rounding the value towards zero is returned. + /// + /// The width given in operand 1 must be equal to, or smaller than, the scalar + /// result type width. It may end up being smaller than the result witdh as a + /// result of integer type legalization. + FP_TO_SINT_SAT, + FP_TO_UINT_SAT, + /// X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type /// down to the precision of the destination VT. TRUNC is a flag, which is /// always an integer that is zero or one. If TRUNC is 0, this is a @@ -844,13 +890,18 @@ enum NodeType { /// BRCOND - Conditional branch. The first operand is the chain, the /// second is the condition, the third is the block to branch to if the /// condition is true. If the type of the condition is not i1, then the - /// high bits must conform to getBooleanContents. + /// high bits must conform to getBooleanContents. If the condition is undef, + /// it nondeterministically jumps to the block. + /// TODO: Its semantics w.r.t undef requires further discussion; we need to + /// make it sure that it is consistent with optimizations in MIR & the + /// meaning of IMPLICIT_DEF. See https://reviews.llvm.org/D92015 BRCOND, /// BR_CC - Conditional branch. The behavior is like that of SELECT_CC, in /// that the condition is represented as condition code, and two nodes to /// compare, rather than as a combined SetCC node. The operands in order - /// are chain, cc, lhs, rhs, block to branch to if condition is true. + /// are chain, cc, lhs, rhs, block to branch to if condition is true. If + /// condition is undef, it nondeterministically jumps to the block. BR_CC, /// INLINEASM - Represents an inline asm block. This node always has two @@ -981,6 +1032,9 @@ enum NodeType { /// DEBUGTRAP - Trap intended to get the attention of a debugger. DEBUGTRAP, + /// UBSANTRAP - Trap with an immediate describing the kind of sanitizer failure. + UBSANTRAP, + /// PREFETCH - This corresponds to a prefetch intrinsic. The first operand /// is the chain. The other operands are the address to prefetch, /// read / write specifier, locality specifier and instruction / data cache @@ -1075,6 +1129,10 @@ enum NodeType { /// known nonzero constant. The only operand here is the chain. GET_DYNAMIC_AREA_OFFSET, + /// Pseudo probe for AutoFDO, as a place holder in a basic block to improve + /// the sample counts quality. + PSEUDO_PROBE, + /// VSCALE(IMM) - Returns the runtime scaling factor used to calculate the /// number of elements within a scalable vector. IMM is a constant integer /// multiplier that is applied to the runtime value. @@ -1082,12 +1140,25 @@ enum NodeType { /// Generic reduction nodes. These nodes represent horizontal vector /// reduction operations, producing a scalar result. - /// The STRICT variants perform reductions in sequential order. The first + /// The SEQ variants perform reductions in sequential order. The first /// operand is an initial scalar accumulator value, and the second operand /// is the vector to reduce. - VECREDUCE_STRICT_FADD, - VECREDUCE_STRICT_FMUL, - /// These reductions are non-strict, and have a single vector operand. + /// E.g. RES = VECREDUCE_SEQ_FADD f32 ACC, <4 x f32> SRC_VEC + /// ... is equivalent to + /// RES = (((ACC + SRC_VEC[0]) + SRC_VEC[1]) + SRC_VEC[2]) + SRC_VEC[3] + VECREDUCE_SEQ_FADD, + VECREDUCE_SEQ_FMUL, + + /// These reductions have relaxed evaluation order semantics, and have a + /// single vector operand. The order of evaluation is unspecified. For + /// pow-of-2 vectors, one valid legalizer expansion is to use a tree + /// reduction, i.e.: + /// For RES = VECREDUCE_FADD <8 x f16> SRC_VEC + /// PART_RDX = FADD SRC_VEC[0:3], SRC_VEC[4:7] + /// PART_RDX2 = FADD PART_RDX[0:1], PART_RDX[2:3] + /// RES = FADD PART_RDX2[0], PART_RDX2[1] + /// For non-pow-2 vectors, this can be computed by extracting each element + /// and performing the operation as if it were scalarized. VECREDUCE_FADD, VECREDUCE_FMUL, /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants. @@ -1106,6 +1177,10 @@ enum NodeType { VECREDUCE_UMAX, VECREDUCE_UMIN, +// Vector Predication +#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) VPSDID, +#include "llvm/IR/VPIntrinsics.def" + /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific pre-isel opcode values start here. BUILTIN_OP_END @@ -1122,6 +1197,19 @@ static const int FIRST_TARGET_STRICTFP_OPCODE = BUILTIN_OP_END + 400; /// be used with SelectionDAG::getMemIntrinsicNode. static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END + 500; +/// Get underlying scalar opcode for VECREDUCE opcode. +/// For example ISD::AND for ISD::VECREDUCE_AND. +NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode); + +/// Whether this is a vector-predicated Opcode. +bool isVPOpcode(unsigned Opcode); + +/// The operand position of the vector mask. +Optional<unsigned> getVPMaskIdx(unsigned Opcode); + +/// The operand position of the explicit vector length parameter. +Optional<unsigned> getVPExplicitVectorLengthIdx(unsigned Opcode); + //===--------------------------------------------------------------------===// /// MemIndexedMode enum - This enum defines the load / store indexed /// addressing modes. @@ -1244,6 +1332,12 @@ inline bool isUnsignedIntSetCC(CondCode Code) { return Code == SETUGT || Code == SETUGE || Code == SETULT || Code == SETULE; } +/// Return true if this is a setcc instruction that performs an equality +/// comparison when used with integer operands. +inline bool isIntEqualitySetCC(CondCode Code) { + return Code == SETEQ || Code == SETNE; +} + /// Return true if the specified condition returns true if the two operands to /// the condition are equal. Note that if one of the two operands is a NaN, /// this value is meaningless. diff --git a/llvm/include/llvm/CodeGen/LexicalScopes.h b/llvm/include/llvm/CodeGen/LexicalScopes.h index bac850d327ef..9617ba80c138 100644 --- a/llvm/include/llvm/CodeGen/LexicalScopes.h +++ b/llvm/include/llvm/CodeGen/LexicalScopes.h @@ -194,9 +194,6 @@ public: return I != LexicalScopeMap.end() ? &I->second : nullptr; } - /// dump - Print data structures to dbgs(). - void dump() const; - /// getOrCreateAbstractScope - Find or create an abstract lexical scope. LexicalScope *getOrCreateAbstractScope(const DILocalScope *Scope); diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h index 0764257125e6..c2b158ac1b7f 100644 --- a/llvm/include/llvm/CodeGen/LiveInterval.h +++ b/llvm/include/llvm/CodeGen/LiveInterval.h @@ -25,6 +25,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Allocator.h" @@ -597,10 +598,9 @@ namespace llvm { /// @p End. bool isUndefIn(ArrayRef<SlotIndex> Undefs, SlotIndex Begin, SlotIndex End) const { - return std::any_of(Undefs.begin(), Undefs.end(), - [Begin,End] (SlotIndex Idx) -> bool { - return Begin <= Idx && Idx < End; - }); + return llvm::any_of(Undefs, [Begin, End](SlotIndex Idx) -> bool { + return Begin <= Idx && Idx < End; + }); } /// Flush segment set into the regular segment vector. @@ -704,12 +704,16 @@ namespace llvm { private: SubRange *SubRanges = nullptr; ///< Single linked list of subregister live /// ranges. + const Register Reg; // the register or stack slot of this interval. + float Weight = 0.0; // weight of this interval public: - const unsigned reg; // the register or stack slot of this interval. - float weight; // weight of this interval + Register reg() const { return Reg; } + float weight() const { return Weight; } + void incrementWeight(float Inc) { Weight += Inc; } + void setWeight(float Value) { Weight = Value; } - LiveInterval(unsigned Reg, float Weight) : reg(Reg), weight(Weight) {} + LiveInterval(unsigned Reg, float Weight) : Reg(Reg), Weight(Weight) {} ~LiveInterval() { clearSubRanges(); @@ -731,10 +735,10 @@ namespace llvm { ++*this; return res; } - bool operator!=(const SingleLinkedListIterator<T> &Other) { + bool operator!=(const SingleLinkedListIterator<T> &Other) const { return P != Other.operator->(); } - bool operator==(const SingleLinkedListIterator<T> &Other) { + bool operator==(const SingleLinkedListIterator<T> &Other) const { return P == Other.operator->(); } T &operator*() const { @@ -806,14 +810,10 @@ namespace llvm { unsigned getSize() const; /// isSpillable - Can this interval be spilled? - bool isSpillable() const { - return weight != huge_valf; - } + bool isSpillable() const { return Weight != huge_valf; } /// markNotSpillable - Mark interval as not spillable - void markNotSpillable() { - weight = huge_valf; - } + void markNotSpillable() { Weight = huge_valf; } /// For a given lane mask @p LaneMask, compute indexes at which the /// lane is marked undefined by subregister <def,read-undef> definitions. @@ -834,7 +834,7 @@ namespace llvm { /// function will be applied to the L0010 and L0008 subranges. /// /// \p Indexes and \p TRI are required to clean up the VNIs that - /// don't defne the related lane masks after they get shrunk. E.g., + /// don't define the related lane masks after they get shrunk. E.g., /// when L000F gets split into L0007 and L0008 maybe only a subset /// of the VNIs that defined L000F defines L0007. /// @@ -870,7 +870,7 @@ namespace llvm { bool operator<(const LiveInterval& other) const { const SlotIndex &thisIndex = beginIndex(); const SlotIndex &otherIndex = other.beginIndex(); - return std::tie(thisIndex, reg) < std::tie(otherIndex, other.reg); + return std::tie(thisIndex, Reg) < std::tie(otherIndex, other.Reg); } void print(raw_ostream &OS) const; diff --git a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h index c555763a4ec2..ad9e06d2bcf0 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h +++ b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h @@ -104,6 +104,9 @@ public: void verify(LiveVirtRegBitSet& VisitedVRegs); #endif + // Get any virtual register that is assign to this physical unit + LiveInterval *getOneVReg() const; + /// Query interferences between a single live virtual register and a live /// interval union. class Query { diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h index 945a40829714..fa08166791b0 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervals.h +++ b/llvm/include/llvm/CodeGen/LiveIntervals.h @@ -114,8 +114,8 @@ class VirtRegMap; LiveInterval &getInterval(Register Reg) { if (hasInterval(Reg)) return *VirtRegIntervals[Reg.id()]; - else - return createAndComputeVirtRegInterval(Reg); + + return createAndComputeVirtRegInterval(Reg); } const LiveInterval &getInterval(Register Reg) const { @@ -142,14 +142,14 @@ class VirtRegMap; } /// Interval removal. - void removeInterval(unsigned Reg) { + void removeInterval(Register Reg) { delete VirtRegIntervals[Reg]; VirtRegIntervals[Reg] = nullptr; } /// Given a register and an instruction, adds a live segment from that /// instruction to the end of its MBB. - LiveInterval::Segment addSegmentToEndOfBlock(unsigned reg, + LiveInterval::Segment addSegmentToEndOfBlock(Register Reg, MachineInstr &startInst); /// After removing some uses of a register, shrink its live range to just @@ -167,7 +167,7 @@ class VirtRegMap; /// the lane mask of the subregister range. /// This may leave the subrange empty which needs to be cleaned up with /// LiveInterval::removeEmptySubranges() afterwards. - void shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg); + void shrinkToUses(LiveInterval::SubRange &SR, Register Reg); /// Extend the live range \p LR to reach all points in \p Indices. The /// points in the \p Indices array must be jointly dominated by the union @@ -256,9 +256,8 @@ class VirtRegMap; return Indexes->getMBBFromIndex(index); } - void insertMBBInMaps(MachineBasicBlock *MBB, - MachineInstr *InsertionPoint = nullptr) { - Indexes->insertMBBInMaps(MBB, InsertionPoint); + void insertMBBInMaps(MachineBasicBlock *MBB) { + Indexes->insertMBBInMaps(MBB); assert(unsigned(MBB->getNumber()) == RegMaskBlocks.size() && "Blocks must be added in order."); RegMaskBlocks.push_back(std::make_pair(RegMaskSlots.size(), 0)); @@ -423,7 +422,7 @@ class VirtRegMap; /// Reg. Subsequent uses should rely on on-demand recomputation. \note This /// method can result in inconsistent liveness tracking if multiple phyical /// registers share a regunit, and should be used cautiously. - void removeAllRegUnitsForPhysReg(unsigned Reg) { + void removeAllRegUnitsForPhysReg(MCRegister Reg) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) removeRegUnit(*Units); } @@ -431,7 +430,7 @@ class VirtRegMap; /// Remove value numbers and related live segments starting at position /// \p Pos that are part of any liverange of physical register \p Reg or one /// of its subregisters. - void removePhysRegDefAt(unsigned Reg, SlotIndex Pos); + void removePhysRegDefAt(MCRegister Reg, SlotIndex Pos); /// Remove value number and related live segments of \p LI and its subranges /// that start at position \p Pos. @@ -463,7 +462,7 @@ class VirtRegMap; bool computeDeadValues(LiveInterval &LI, SmallVectorImpl<MachineInstr*> *dead); - static LiveInterval* createInterval(unsigned Reg); + static LiveInterval *createInterval(Register Reg); void printInstrs(raw_ostream &O) const; void dumpInstrs() const; @@ -474,7 +473,7 @@ class VirtRegMap; using ShrinkToUsesWorkList = SmallVector<std::pair<SlotIndex, VNInfo*>, 16>; void extendSegmentsToUses(LiveRange &Segments, - ShrinkToUsesWorkList &WorkList, unsigned Reg, + ShrinkToUsesWorkList &WorkList, Register Reg, LaneBitmask LaneMask); /// Helper function for repairIntervalsInRange(), walks backwards and @@ -484,7 +483,7 @@ class VirtRegMap; void repairOldRegInRange(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, const SlotIndex endIdx, LiveRange &LR, - unsigned Reg, + Register Reg, LaneBitmask LaneMask = LaneBitmask::getAll()); class HMEditor; diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h index 3c4273130ab2..87d48adc7f27 100644 --- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -56,14 +56,14 @@ public: /// Called when a virtual register is no longer used. Return false to defer /// its deletion from LiveIntervals. - virtual bool LRE_CanEraseVirtReg(unsigned) { return true; } + virtual bool LRE_CanEraseVirtReg(Register) { return true; } /// Called before shrinking the live range of a virtual register. - virtual void LRE_WillShrinkVirtReg(unsigned) {} + virtual void LRE_WillShrinkVirtReg(Register) {} /// Called after cloning a virtual register. /// This is used for new registers representing connected components of Old. - virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {} + virtual void LRE_DidCloneVirtReg(Register New, Register Old) {} }; private: @@ -152,7 +152,7 @@ public: return *Parent; } - Register getReg() const { return getParent().reg; } + Register getReg() const { return getParent().reg(); } /// Iterator for accessing the new registers added by this edit. using iterator = SmallVectorImpl<Register>::const_iterator; diff --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/llvm/include/llvm/CodeGen/LiveRegMatrix.h index ab4d44f9a611..fc67bce329ab 100644 --- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h +++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h @@ -104,19 +104,19 @@ public: /// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg). /// When there is more than one kind of interference, the InterferenceKind /// with the highest enum value is returned. - InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg); + InterferenceKind checkInterference(LiveInterval &VirtReg, MCRegister PhysReg); /// Check for interference in the segment [Start, End) that may prevent /// assignment to PhysReg. If this function returns true, there is /// interference in the segment [Start, End) of some other interval already /// assigned to PhysReg. If this function returns false, PhysReg is free at /// the segment [Start, End). - bool checkInterference(SlotIndex Start, SlotIndex End, unsigned PhysReg); + bool checkInterference(SlotIndex Start, SlotIndex End, MCRegister PhysReg); /// Assign VirtReg to PhysReg. /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and /// update VirtRegMap. The live range is expected to be available in PhysReg. - void assign(LiveInterval &VirtReg, unsigned PhysReg); + void assign(LiveInterval &VirtReg, MCRegister PhysReg); /// Unassign VirtReg from its PhysReg. /// Assuming that VirtReg was previously assigned to a PhysReg, this undoes @@ -124,7 +124,7 @@ public: void unassign(LiveInterval &VirtReg); /// Returns true if the given \p PhysReg has any live intervals assigned. - bool isPhysRegUsed(unsigned PhysReg) const; + bool isPhysRegUsed(MCRegister PhysReg) const; //===--------------------------------------------------------------------===// // Low-level interface. @@ -136,22 +136,25 @@ public: /// Check for regmask interference only. /// Return true if VirtReg crosses a regmask operand that clobbers PhysReg. /// If PhysReg is null, check if VirtReg crosses any regmask operands. - bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0); + bool checkRegMaskInterference(LiveInterval &VirtReg, + MCRegister PhysReg = MCRegister::NoRegister); /// Check for regunit interference only. /// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's /// register units. - bool checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg); + bool checkRegUnitInterference(LiveInterval &VirtReg, MCRegister PhysReg); /// Query a line of the assigned virtual register matrix directly. /// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg. /// This returns a reference to an internal Query data structure that is only /// valid until the next query() call. - LiveIntervalUnion::Query &query(const LiveRange &LR, unsigned RegUnit); + LiveIntervalUnion::Query &query(const LiveRange &LR, MCRegister RegUnit); /// Directly access the live interval unions per regunit. /// This returns an array indexed by the regunit number. LiveIntervalUnion *getLiveUnions() { return &Matrix[0]; } + + Register getOneVReg(unsigned PhysReg) const; }; } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/LiveRegUnits.h b/llvm/include/llvm/CodeGen/LiveRegUnits.h index 1ed091e3bb5e..39a1ec461ef6 100644 --- a/llvm/include/llvm/CodeGen/LiveRegUnits.h +++ b/llvm/include/llvm/CodeGen/LiveRegUnits.h @@ -15,7 +15,7 @@ #define LLVM_CODEGEN_LIVEREGUNITS_H #include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" @@ -67,7 +67,6 @@ public: UsedRegUnits.addReg(Reg); } } - return; } /// Initialize and clear the set. diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h index efb0fa85a0fe..9b0667bbbeb0 100644 --- a/llvm/include/llvm/CodeGen/LiveVariables.h +++ b/llvm/include/llvm/CodeGen/LiveVariables.h @@ -105,8 +105,7 @@ public: /// isLiveIn - Is Reg live in to MBB? This means that Reg is live through /// MBB, or it is killed in MBB. If Reg is only used by PHI instructions in /// MBB, it is not considered live in. - bool isLiveIn(const MachineBasicBlock &MBB, - unsigned Reg, + bool isLiveIn(const MachineBasicBlock &MBB, Register Reg, MachineRegisterInfo &MRI); void dump() const; @@ -149,25 +148,25 @@ private: // Intermediate data structures /// HandlePhysRegKill - Add kills of Reg and its sub-registers to the /// uses. Pay special attention to the sub-register uses which may come below /// the last use of the whole register. - bool HandlePhysRegKill(unsigned Reg, MachineInstr *MI); + bool HandlePhysRegKill(Register Reg, MachineInstr *MI); /// HandleRegMask - Call HandlePhysRegKill for all registers clobbered by Mask. void HandleRegMask(const MachineOperand&); - void HandlePhysRegUse(unsigned Reg, MachineInstr &MI); - void HandlePhysRegDef(unsigned Reg, MachineInstr *MI, + void HandlePhysRegUse(Register Reg, MachineInstr &MI); + void HandlePhysRegDef(Register Reg, MachineInstr *MI, SmallVectorImpl<unsigned> &Defs); void UpdatePhysRegDefs(MachineInstr &MI, SmallVectorImpl<unsigned> &Defs); /// FindLastRefOrPartRef - Return the last reference or partial reference of /// the specified register. - MachineInstr *FindLastRefOrPartRef(unsigned Reg); + MachineInstr *FindLastRefOrPartRef(Register Reg); /// FindLastPartialDef - Return the last partial def of the specified /// register. Also returns the sub-registers that're defined by the /// instruction. - MachineInstr *FindLastPartialDef(unsigned Reg, - SmallSet<unsigned,4> &PartDefRegs); + MachineInstr *FindLastPartialDef(Register Reg, + SmallSet<unsigned, 4> &PartDefRegs); /// analyzePHINodes - Gather information about the PHI nodes in here. In /// particular, we want to map the variable information of a virtual @@ -184,21 +183,21 @@ public: /// RegisterDefIsDead - Return true if the specified instruction defines the /// specified register, but that definition is dead. - bool RegisterDefIsDead(MachineInstr &MI, unsigned Reg) const; + bool RegisterDefIsDead(MachineInstr &MI, Register Reg) const; //===--------------------------------------------------------------------===// // API to update live variable information /// replaceKillInstruction - Update register kill info by replacing a kill /// instruction with a new one. - void replaceKillInstruction(unsigned Reg, MachineInstr &OldMI, + void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI); /// addVirtualRegisterKilled - Add information about the fact that the /// specified register is killed after being used by the specified /// instruction. If AddIfNotFound is true, add a implicit operand if it's /// not found. - void addVirtualRegisterKilled(unsigned IncomingReg, MachineInstr &MI, + void addVirtualRegisterKilled(Register IncomingReg, MachineInstr &MI, bool AddIfNotFound = false) { if (MI.addRegisterKilled(IncomingReg, TRI, AddIfNotFound)) getVarInfo(IncomingReg).Kills.push_back(&MI); @@ -208,14 +207,14 @@ public: /// register from the live variable information. Returns true if the /// variable was marked as killed by the specified instruction, /// false otherwise. - bool removeVirtualRegisterKilled(unsigned reg, MachineInstr &MI) { - if (!getVarInfo(reg).removeKill(MI)) + bool removeVirtualRegisterKilled(Register Reg, MachineInstr &MI) { + if (!getVarInfo(Reg).removeKill(MI)) return false; bool Removed = false; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && MO.isKill() && MO.getReg() == reg) { + if (MO.isReg() && MO.isKill() && MO.getReg() == Reg) { MO.setIsKill(false); Removed = true; break; @@ -234,7 +233,7 @@ public: /// addVirtualRegisterDead - Add information about the fact that the specified /// register is dead after being used by the specified instruction. If /// AddIfNotFound is true, add a implicit operand if it's not found. - void addVirtualRegisterDead(unsigned IncomingReg, MachineInstr &MI, + void addVirtualRegisterDead(Register IncomingReg, MachineInstr &MI, bool AddIfNotFound = false) { if (MI.addRegisterDead(IncomingReg, TRI, AddIfNotFound)) getVarInfo(IncomingReg).Kills.push_back(&MI); @@ -244,14 +243,14 @@ public: /// register from the live variable information. Returns true if the /// variable was marked dead at the specified instruction, false /// otherwise. - bool removeVirtualRegisterDead(unsigned reg, MachineInstr &MI) { - if (!getVarInfo(reg).removeKill(MI)) + bool removeVirtualRegisterDead(Register Reg, MachineInstr &MI) { + if (!getVarInfo(Reg).removeKill(MI)) return false; bool Removed = false; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && MO.isDef() && MO.getReg() == reg) { + if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { MO.setIsDead(false); Removed = true; break; @@ -270,24 +269,25 @@ public: /// getVarInfo - Return the VarInfo structure for the specified VIRTUAL /// register. - VarInfo &getVarInfo(unsigned RegIdx); + VarInfo &getVarInfo(Register Reg); void MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock* DefBlock, MachineBasicBlock *BB); - void MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock* DefBlock, + void MarkVirtRegAliveInBlock(VarInfo &VRInfo, MachineBasicBlock *DefBlock, MachineBasicBlock *BB, - std::vector<MachineBasicBlock*> &WorkList); - void HandleVirtRegDef(unsigned reg, MachineInstr &MI); - void HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, MachineInstr &MI); + SmallVectorImpl<MachineBasicBlock *> &WorkList); + + void HandleVirtRegDef(Register reg, MachineInstr &MI); + void HandleVirtRegUse(Register reg, MachineBasicBlock *MBB, MachineInstr &MI); - bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB) { + bool isLiveIn(Register Reg, const MachineBasicBlock &MBB) { return getVarInfo(Reg).isLiveIn(MBB, Reg, *MRI); } /// isLiveOut - Determine if Reg is live out from MBB, when not considering /// PHI nodes. This means that Reg is either killed by a successor block or /// passed through one. - bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB); + bool isLiveOut(Register Reg, const MachineBasicBlock &MBB); /// addNewBlock - Add a new basic block BB between DomBB and SuccBB. All /// variables that are live out of DomBB and live into SuccBB will be marked @@ -303,10 +303,10 @@ public: std::vector<SparseBitVector<>> &LiveInSets); /// isPHIJoin - Return true if Reg is a phi join register. - bool isPHIJoin(unsigned Reg) { return PHIJoins.test(Reg); } + bool isPHIJoin(Register Reg) { return PHIJoins.test(Reg.id()); } /// setPHIJoin - Mark Reg as a phi join register. - void setPHIJoin(unsigned Reg) { PHIJoins.set(Reg); } + void setPHIJoin(Register Reg) { PHIJoins.set(Reg.id()); } }; } // End llvm namespace diff --git a/llvm/include/llvm/CodeGen/LowLevelType.h b/llvm/include/llvm/CodeGen/LowLevelType.h index 6295d86f749c..402fa2ce61e7 100644 --- a/llvm/include/llvm/CodeGen/LowLevelType.h +++ b/llvm/include/llvm/CodeGen/LowLevelType.h @@ -23,6 +23,7 @@ namespace llvm { class DataLayout; class Type; +struct fltSemantics; /// Construct a low-level type based on an LLVM type. LLT getLLTForType(Type &Ty, const DataLayout &DL); @@ -35,6 +36,9 @@ MVT getMVTForLLT(LLT Ty); /// scalarable vector types, and will assert if used. LLT getLLTForMVT(MVT Ty); +/// Get the appropriate floating point arithmetic semantic based on the bit size +/// of the given scalar LLT. +const llvm::fltSemantics &getFltSemanticForLLT(LLT Ty); } #endif // LLVM_CODEGEN_LOWLEVELTYPE_H diff --git a/llvm/include/llvm/CodeGen/MBFIWrapper.h b/llvm/include/llvm/CodeGen/MBFIWrapper.h index 062431a6f96b..bcbf3eedf59d 100644 --- a/llvm/include/llvm/CodeGen/MBFIWrapper.h +++ b/llvm/include/llvm/CodeGen/MBFIWrapper.h @@ -28,6 +28,8 @@ class MBFIWrapper { BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F); + Optional<uint64_t> getBlockProfileCount(const MachineBasicBlock *MBB) const; + raw_ostream &printBlockFreq(raw_ostream &OS, const MachineBasicBlock *MBB) const; raw_ostream &printBlockFreq(raw_ostream &OS, diff --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h index e57c32c5ae61..9cb92091db50 100644 --- a/llvm/include/llvm/CodeGen/MIRFormatter.h +++ b/llvm/include/llvm/CodeGen/MIRFormatter.h @@ -14,11 +14,15 @@ #ifndef LLVM_CODEGEN_MIRFORMATTER_H #define LLVM_CODEGEN_MIRFORMATTER_H -#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/ADT/Optional.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdint> namespace llvm { +class MachineFunction; +class MachineInstr; struct PerFunctionMIParsingState; struct SlotMapping; diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h index c68b073ebb8c..4a7406473b11 100644 --- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h +++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h @@ -159,6 +159,22 @@ template <> struct ScalarTraits<MaybeAlign> { static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; +template <> struct ScalarTraits<Align> { + static void output(const Align &Alignment, void *, llvm::raw_ostream &OS) { + OS << Alignment.value(); + } + static StringRef input(StringRef Scalar, void *, Align &Alignment) { + unsigned long long N; + if (getAsUnsignedInteger(Scalar, 10, N)) + return "invalid number"; + if (!isPowerOf2_64(N)) + return "must be a power of two"; + Alignment = Align(N); + return StringRef(); + } + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + } // end namespace yaml } // end namespace llvm @@ -331,7 +347,7 @@ struct ScalarEnumerationTraits<TargetStackID::Value> { static void enumeration(yaml::IO &IO, TargetStackID::Value &ID) { IO.enumCase(ID, "default", TargetStackID::Default); IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill); - IO.enumCase(ID, "sve-vec", TargetStackID::SVEVector); + IO.enumCase(ID, "scalable-vector", TargetStackID::ScalableVector); IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc); } }; @@ -425,6 +441,36 @@ template <> struct MappingTraits<CallSiteInfo> { static const bool flow = true; }; +/// Serializable representation of debug value substitutions. +struct DebugValueSubstitution { + unsigned SrcInst; + unsigned SrcOp; + unsigned DstInst; + unsigned DstOp; + + bool operator==(const DebugValueSubstitution &Other) const { + return std::tie(SrcInst, SrcOp, DstInst, DstOp) == + std::tie(Other.SrcInst, Other.SrcOp, Other.DstInst, Other.DstOp); + } +}; + +template <> struct MappingTraits<DebugValueSubstitution> { + static void mapping(IO &YamlIO, DebugValueSubstitution &Sub) { + YamlIO.mapRequired("srcinst", Sub.SrcInst); + YamlIO.mapRequired("srcop", Sub.SrcOp); + YamlIO.mapRequired("dstinst", Sub.DstInst); + YamlIO.mapRequired("dstop", Sub.DstOp); + } + + static const bool flow = true; +}; +} // namespace yaml +} // namespace llvm + +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::DebugValueSubstitution) + +namespace llvm { +namespace yaml { struct MachineConstantPoolValue { UnsignedValue ID; StringValue Value; @@ -609,6 +655,7 @@ struct MachineFunction { std::vector<MachineConstantPoolValue> Constants; /// Constant pool. std::unique_ptr<MachineFunctionInfo> MachineFuncInfo; std::vector<CallSiteInfo> CallSitesInfo; + std::vector<DebugValueSubstitution> DebugValueSubstitutions; MachineJumpTable JumpTableInfo; BlockStringValue Body; }; @@ -637,6 +684,8 @@ template <> struct MappingTraits<MachineFunction> { std::vector<MachineStackObject>()); YamlIO.mapOptional("callSites", MF.CallSitesInfo, std::vector<CallSiteInfo>()); + YamlIO.mapOptional("debugValueSubstitutions", MF.DebugValueSubstitutions, + std::vector<DebugValueSubstitution>()); YamlIO.mapOptional("constants", MF.Constants, std::vector<MachineConstantPoolValue>()); YamlIO.mapOptional("machineFunctionInfo", MF.MachineFuncInfo); diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index d6cb7211cf70..2bad64c6cc2e 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -40,6 +40,7 @@ class Printable; class SlotIndexes; class StringRef; class raw_ostream; +class LiveIntervals; class TargetRegisterClass; class TargetRegisterInfo; @@ -174,8 +175,9 @@ private: /// is only computed once and is cached. mutable MCSymbol *CachedMCSymbol = nullptr; - /// Used during basic block sections to mark the end of a basic block. - MCSymbol *EndMCSymbol = nullptr; + /// Marks the end of the basic block. Used during basic block sections to + /// calculate the size of the basic block, or the BB section ending with it. + mutable MCSymbol *CachedEndMCSymbol = nullptr; // Intrusive list support MachineBasicBlock() = default; @@ -432,6 +434,9 @@ public: bool hasEHPadSuccessor() const; + /// Returns true if this is the entry block of the function. + bool isEntryBlock() const; + /// Returns true if this is the entry block of an EH scope, i.e., the block /// that used to have a catchpad or cleanuppad instruction in the LLVM IR. bool isEHScopeEntry() const { return IsEHScopeEntry; } @@ -474,6 +479,9 @@ public: /// Sets the section ID for this basic block. void setSectionID(MBBSectionID V) { SectionID = V; } + /// Returns the MCSymbol marking the end of this basic block. + MCSymbol *getEndSymbol() const; + /// Returns true if this block may have an INLINEASM_BR (overestimate, by /// checking if any of the successors are indirect targets of any inlineasm_br /// in the function). @@ -671,6 +679,17 @@ public: return !empty() && back().isEHScopeReturn(); } + /// Split a basic block into 2 pieces at \p SplitPoint. A new block will be + /// inserted after this block, and all instructions after \p SplitInst moved + /// to it (\p SplitInst will be in the original block). If \p LIS is provided, + /// LiveIntervals will be appropriately updated. \return the newly inserted + /// block. + /// + /// If \p UpdateLiveIns is true, this will ensure the live ins list is + /// accurate, including for physreg uses/defs in the original block. + MachineBasicBlock *splitAt(MachineInstr &SplitInst, bool UpdateLiveIns = true, + LiveIntervals *LIS = nullptr); + /// Split the critical edge from this block to the given successor block, and /// return the newly created block, or null if splitting is not possible. /// @@ -872,6 +891,14 @@ public: void print(raw_ostream &OS, ModuleSlotTracker &MST, const SlotIndexes * = nullptr, bool IsStandalone = true) const; + enum PrintNameFlag { + PrintNameIr = (1 << 0), ///< Add IR name where available + PrintNameAttributes = (1 << 1), ///< Print attributes + }; + + void printName(raw_ostream &os, unsigned printNameFlags = PrintNameIr, + ModuleSlotTracker *moduleSlotTracker = nullptr) const; + // Printing method used by LoopInfo. void printAsOperand(raw_ostream &OS, bool PrintType = true) const; diff --git a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h index 0f8d69ebd7da..6c442d3d07bd 100644 --- a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h +++ b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h @@ -58,18 +58,33 @@ public: /// information. Please note that initial frequency is equal to 1024. It means /// that we should not rely on the value itself, but only on the comparison to /// the other block frequencies. We do this to avoid using of floating points. - /// + /// For example, to get the frequency of a block relative to the entry block, + /// divide the integral value returned by this function (the + /// BlockFrequency::getFrequency() value) by getEntryFreq(). BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; + /// Compute the frequency of the block, relative to the entry block. + /// This API assumes getEntryFreq() is non-zero. + float getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const { + return getBlockFreq(MBB).getFrequency() * (1.0f / getEntryFreq()); + } + Optional<uint64_t> getBlockProfileCount(const MachineBasicBlock *MBB) const; Optional<uint64_t> getProfileCountFromFreq(uint64_t Freq) const; - bool isIrrLoopHeader(const MachineBasicBlock *MBB); + bool isIrrLoopHeader(const MachineBasicBlock *MBB) const; - void setBlockFreq(const MachineBasicBlock *MBB, uint64_t Freq); + /// incrementally calculate block frequencies when we split edges, to avoid + /// full CFG traversal. + void onEdgeSplit(const MachineBasicBlock &NewPredecessor, + const MachineBasicBlock &NewSuccessor, + const MachineBranchProbabilityInfo &MBPI); const MachineFunction *getFunction() const; const MachineBranchProbabilityInfo *getMBPI() const; + + /// Pop up a ghostview window with the current block frequency propagation + /// rendered using dot. void view(const Twine &Name, bool isSimple = true) const; // Print the block frequency Freq to OS using the current functions entry @@ -81,6 +96,8 @@ public: raw_ostream &printBlockFreq(raw_ostream &OS, const MachineBasicBlock *MBB) const; + /// Divide a block's BlockFrequency::getFrequency() value by this value to + /// obtain the entry block - relative frequency of said block. uint64_t getEntryFreq() const; }; diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h index e9f52fb064e1..ac0cc70744d1 100644 --- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -29,6 +29,11 @@ enum class MachineCombinerPattern { REASSOC_XY_AMM_BMM, REASSOC_XMM_AMM_BMM, + // These are patterns matched by the PowerPC to reassociate FMA and FSUB to + // reduce register pressure. + REASSOC_XY_BCA, + REASSOC_XY_BAC, + // These are multiply-add patterns matched by the AArch64 machine combiner. MULADDW_OP1, MULADDW_OP2, diff --git a/llvm/include/llvm/CodeGen/MachineConstantPool.h b/llvm/include/llvm/CodeGen/MachineConstantPool.h index cfc9ca88c976..a9bc0ce300b2 100644 --- a/llvm/include/llvm/CodeGen/MachineConstantPool.h +++ b/llvm/include/llvm/CodeGen/MachineConstantPool.h @@ -41,10 +41,10 @@ public: explicit MachineConstantPoolValue(Type *ty) : Ty(ty) {} virtual ~MachineConstantPoolValue() = default; - /// getType - get type of this MachineConstantPoolValue. - /// Type *getType() const { return Ty; } + virtual unsigned getSizeInBytes(const DataLayout &DL) const; + virtual int getExistingMachineCPValue(MachineConstantPool *CP, Align Alignment) = 0; @@ -94,7 +94,7 @@ public: Align getAlign() const { return Alignment; } - Type *getType() const; + unsigned getSizeInBytes(const DataLayout &DL) const; /// This method classifies the entry according to whether or not it may /// generate a relocation entry. This must be conservative, so if it might diff --git a/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h b/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h index f7bbd07a63ab..e3e679608784 100644 --- a/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h +++ b/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h @@ -14,7 +14,6 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/GenericDomTree.h" -#include <vector> namespace llvm { diff --git a/llvm/include/llvm/CodeGen/MachineDominators.h b/llvm/include/llvm/CodeGen/MachineDominators.h index cf3af4d38223..46bf73cdd7b6 100644 --- a/llvm/include/llvm/CodeGen/MachineDominators.h +++ b/llvm/include/llvm/CodeGen/MachineDominators.h @@ -23,7 +23,6 @@ #include "llvm/Support/GenericDomTreeConstruction.h" #include <cassert> #include <memory> -#include <vector> namespace llvm { diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 5cd7f9cde674..7f0ec0df57c5 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -14,6 +14,7 @@ #define LLVM_CODEGEN_MACHINEFRAMEINFO_H #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Register.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/DataTypes.h" #include <cassert> @@ -31,7 +32,7 @@ class AllocaInst; /// Callee saved reg can also be saved to a different register rather than /// on the stack by setting DstReg instead of FrameIdx. class CalleeSavedInfo { - unsigned Reg; + Register Reg; union { int FrameIdx; unsigned DstReg; @@ -58,14 +59,14 @@ public: : Reg(R), FrameIdx(FI), Restored(true), SpilledToReg(false) {} // Accessors. - unsigned getReg() const { return Reg; } + Register getReg() const { return Reg; } int getFrameIdx() const { return FrameIdx; } unsigned getDstReg() const { return DstReg; } void setFrameIdx(int FI) { FrameIdx = FI; SpilledToReg = false; } - void setDstReg(unsigned SpillReg) { + void setDstReg(Register SpillReg) { DstReg = SpillReg; SpilledToReg = true; } diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 809c21dd26fc..e9979c788ce0 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -431,6 +431,39 @@ public: using VariableDbgInfoMapTy = SmallVector<VariableDbgInfo, 4>; VariableDbgInfoMapTy VariableDbgInfos; + /// A count of how many instructions in the function have had numbers + /// assigned to them. Used for debug value tracking, to determine the + /// next instruction number. + unsigned DebugInstrNumberingCount = 0; + + /// Set value of DebugInstrNumberingCount field. Avoid using this unless + /// you're deserializing this data. + void setDebugInstrNumberingCount(unsigned Num); + + /// Pair of instruction number and operand number. + using DebugInstrOperandPair = std::pair<unsigned, unsigned>; + + /// Substitution map: from one <inst,operand> pair to another. Used to + /// record changes in where a value is defined, so that debug variable + /// locations can find it later. + std::map<DebugInstrOperandPair, DebugInstrOperandPair> + DebugValueSubstitutions; + + /// Create a substitution between one <instr,operand> value to a different, + /// new value. + void makeDebugValueSubstitution(DebugInstrOperandPair, DebugInstrOperandPair); + + /// Create substitutions for any tracked values in \p Old, to point at + /// \p New. Needed when we re-create an instruction during optimization, + /// which has the same signature (i.e., def operands in the same place) but + /// a modified instruction type, flags, or otherwise. An example: X86 moves + /// are sometimes transformed into equivalent LEAs. + /// If the two instructions are not the same opcode, limit which operands to + /// examine for substitutions to the first N operands by setting + /// \p MaxOperand. + void substituteDebugValuesForInst(const MachineInstr &Old, MachineInstr &New, + unsigned MaxOperand = UINT_MAX); + MachineFunction(Function &F, const LLVMTargetMachine &Target, const TargetSubtargetInfo &STI, unsigned FunctionNum, MachineModuleInfo &MMI); @@ -494,7 +527,8 @@ public: /// Returns true if this function has basic block sections enabled. bool hasBBSections() const { return (BBSectionsType == BasicBlockSection::All || - BBSectionsType == BasicBlockSection::List); + BBSectionsType == BasicBlockSection::List || + BBSectionsType == BasicBlockSection::Preset); } /// Returns true if basic block labels are to be generated for this function. @@ -504,9 +538,6 @@ public: void setBBSectionsType(BasicBlockSection V) { BBSectionsType = V; } - /// Creates basic block Labels for this function. - void createBBLabels(); - /// Assign IsBeginSection IsEndSection fields for basic blocks in this /// function. void assignBeginEndSections(); @@ -769,7 +800,7 @@ public: /// CreateMachineInstr - Allocate a new MachineInstr. Use this instead /// of `new MachineInstr'. MachineInstr *CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, - bool NoImp = false); + bool NoImplicit = false); /// Create a new MachineInstr which is a copy of \p Orig, identical in all /// ways except the instruction has no parent, prev, or next. Bundling flags @@ -815,6 +846,14 @@ public: MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size); + /// getMachineMemOperand - Allocate a new MachineMemOperand by copying + /// an existing one, replacing only the MachinePointerInfo and size. + /// MachineMemOperands are owned by the MachineFunction and need not be + /// explicitly deallocated. + MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, + MachinePointerInfo &PtrInfo, + uint64_t Size); + /// Allocate a new MachineMemOperand by copying an existing one, /// replacing only AliasAnalysis information. MachineMemOperands are owned /// by the MachineFunction and need not be explicitly deallocated. @@ -1067,6 +1106,10 @@ public: /// the same callee. void moveCallSiteInfo(const MachineInstr *Old, const MachineInstr *New); + + unsigned getNewDebugInstrNum() { + return ++DebugInstrNumberingCount; + } }; //===--------------------------------------------------------------------===// @@ -1133,6 +1176,11 @@ template <> struct GraphTraits<Inverse<const MachineFunction*>> : } }; +class MachineFunctionAnalysisManager; +void verifyMachineFunction(MachineFunctionAnalysisManager *, + const std::string &Banner, + const MachineFunction &MF); + } // end namespace llvm #endif // LLVM_CODEGEN_MACHINEFUNCTION_H diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 970d6d7db334..6bbe2d03f9e5 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -249,6 +249,10 @@ private: DebugLoc debugLoc; // Source line information. + /// Unique instruction number. Used by DBG_INSTR_REFs to refer to the values + /// defined by this instruction. + unsigned DebugInstrNum; + // Intrusive list support friend struct ilist_traits<MachineInstr>; friend struct ilist_callback_traits<MachineBasicBlock>; @@ -280,6 +284,9 @@ public: const MachineBasicBlock* getParent() const { return Parent; } MachineBasicBlock* getParent() { return Parent; } + /// Move the instruction before \p MovePos. + void moveBefore(MachineInstr *MovePos); + /// Return the function that contains the basic block that this instruction /// belongs to. /// @@ -441,6 +448,18 @@ public: /// this DBG_LABEL instruction. const DILabel *getDebugLabel() const; + /// Fetch the instruction number of this MachineInstr. If it does not have + /// one already, a new and unique number will be assigned. + unsigned getDebugInstrNum(); + + /// Examine the instruction number of this MachineInstr. May be zero if + /// it hasn't been assigned a number yet. + unsigned peekDebugInstrNum() const { return DebugInstrNum; } + + /// Set instruction number of this MachineInstr. Avoid using unless you're + /// deserializing this information. + void setDebugInstrNum(unsigned Num) { DebugInstrNum = Num; } + /// Emit an error referring to the source location of this instruction. /// This should only be used for inline assembly that is somehow /// impossible to compile. Other errors should have been handled much @@ -1142,7 +1161,10 @@ public: bool isDebugValue() const { return getOpcode() == TargetOpcode::DBG_VALUE; } bool isDebugLabel() const { return getOpcode() == TargetOpcode::DBG_LABEL; } - bool isDebugInstr() const { return isDebugValue() || isDebugLabel(); } + bool isDebugRef() const { return getOpcode() == TargetOpcode::DBG_INSTR_REF; } + bool isDebugInstr() const { + return isDebugValue() || isDebugLabel() || isDebugRef(); + } bool isDebugOffsetImm() const { return getDebugOffset().isImm(); } @@ -1235,9 +1257,11 @@ public: case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: case TargetOpcode::DBG_VALUE: + case TargetOpcode::DBG_INSTR_REF: case TargetOpcode::DBG_LABEL: case TargetOpcode::LIFETIME_START: case TargetOpcode::LIFETIME_END: + case TargetOpcode::PSEUDO_PROBE: return true; } } @@ -1310,7 +1334,8 @@ public: /// Return true if the MachineInstr modifies (fully define or partially /// define) the specified register. /// NOTE: It's ignoring subreg indices on virtual registers. - bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const { + bool modifiesRegister(Register Reg, + const TargetRegisterInfo *TRI = nullptr) const { return findRegisterDefOperandIdx(Reg, false, true, TRI) != -1; } @@ -1761,8 +1786,10 @@ public: void setDebugValueUndef() { assert(isDebugValue() && "Must be a debug value instruction."); for (MachineOperand &MO : debug_operands()) { - if (MO.isReg()) + if (MO.isReg()) { MO.setReg(0); + MO.setSubReg(0); + } } } diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h index cabb9f1c97c9..115c50175604 100644 --- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -40,20 +40,30 @@ class MDNode; namespace RegState { - enum { - Define = 0x2, - Implicit = 0x4, - Kill = 0x8, - Dead = 0x10, - Undef = 0x20, - EarlyClobber = 0x40, - Debug = 0x80, - InternalRead = 0x100, - Renamable = 0x200, - DefineNoRead = Define | Undef, - ImplicitDefine = Implicit | Define, - ImplicitKill = Implicit | Kill - }; +enum { + /// Register definition. + Define = 0x2, + /// Not emitted register (e.g. carry, or temporary result). + Implicit = 0x4, + /// The last use of a register. + Kill = 0x8, + /// Unused definition. + Dead = 0x10, + /// Value of the register doesn't matter. + Undef = 0x20, + /// Register definition happens before uses. + EarlyClobber = 0x40, + /// Register 'use' is for debugging purpose. + Debug = 0x80, + /// Register reads a value that is defined inside the same instruction or + /// bundle. + InternalRead = 0x100, + /// Register that may be renamed. + Renamable = 0x200, + DefineNoRead = Define | Undef, + ImplicitDefine = Implicit | Define, + ImplicitKill = Implicit | Kill +}; } // end namespace RegState @@ -295,6 +305,9 @@ public: case MachineOperand::MO_BlockAddress: return addBlockAddress(Disp.getBlockAddress(), Disp.getOffset() + off, TargetFlags); + case MachineOperand::MO_JumpTableIndex: + assert(off == 0 && "cannot create offset into jump tables"); + return addJumpTableIndex(Disp.getIndex(), TargetFlags); } } diff --git a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h index 11781145b378..1d082bd03e5b 100644 --- a/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h +++ b/llvm/include/llvm/CodeGen/MachineJumpTableInfo.h @@ -106,6 +106,9 @@ public: JumpTables[Idx].MBBs.clear(); } + /// RemoveMBBFromJumpTables - If MBB is present in any jump tables, remove it. + bool RemoveMBBFromJumpTables(MachineBasicBlock *MBB); + /// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update /// the jump tables to branch to New instead. bool ReplaceMBBInJumpTables(MachineBasicBlock *Old, MachineBasicBlock *New); diff --git a/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/llvm/include/llvm/CodeGen/MachineLoopInfo.h index 8a93f91ae54d..c7491d4191de 100644 --- a/llvm/include/llvm/CodeGen/MachineLoopInfo.h +++ b/llvm/include/llvm/CodeGen/MachineLoopInfo.h @@ -67,6 +67,12 @@ public: /// it returns an unknown location. DebugLoc getStartLoc() const; + /// Returns true if the instruction is loop invariant. + /// I.e., all virtual register operands are defined outside of the loop, + /// physical registers aren't accessed explicitly, and there are no side + /// effects that aren't captured by the operands or other flags. + bool isLoopInvariant(MachineInstr &I) const; + void dump() const; private: diff --git a/llvm/include/llvm/CodeGen/MachineLoopUtils.h b/llvm/include/llvm/CodeGen/MachineLoopUtils.h index 2cb0134ca848..ec0b3529c0d6 100644 --- a/llvm/include/llvm/CodeGen/MachineLoopUtils.h +++ b/llvm/include/llvm/CodeGen/MachineLoopUtils.h @@ -37,10 +37,6 @@ MachineBasicBlock *PeelSingleBlockLoop(LoopPeelDirection Direction, MachineRegisterInfo &MRI, const TargetInstrInfo *TII); -/// Return true if PhysReg is live outside the loop, i.e. determine if it -/// is live in the loop exit blocks, and false otherwise. -bool isRegLiveInExitBlocks(MachineLoop *Loop, int PhysReg); - } // namespace llvm #endif // LLVM_LIB_CODEGEN_MACHINELOOPUTILS_H diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/llvm/include/llvm/CodeGen/MachineModuleInfo.h index 0ee595b5b5ce..fa900affb214 100644 --- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h +++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h @@ -54,8 +54,8 @@ class Module; //===----------------------------------------------------------------------===// /// This class can be derived from and used by targets to hold private /// target-specific information for each Module. Objects of type are -/// accessed/created with MMI::getInfo and destroyed when the MachineModuleInfo -/// is destroyed. +/// accessed/created with MachineModuleInfo::getObjFileInfo and destroyed when +/// the MachineModuleInfo is destroyed. /// class MachineModuleInfoImpl { public: @@ -83,6 +83,9 @@ class MachineModuleInfo { /// This is the MCContext used for the entire code generator. MCContext Context; + // This is an external context, that if assigned, will be used instead of the + // internal context. + MCContext *ExternalContext = nullptr; /// This is the LLVM Module being worked on. const Module *TheModule; @@ -149,6 +152,9 @@ class MachineModuleInfo { public: explicit MachineModuleInfo(const LLVMTargetMachine *TM = nullptr); + explicit MachineModuleInfo(const LLVMTargetMachine *TM, + MCContext *ExtContext); + MachineModuleInfo(MachineModuleInfo &&MMII); ~MachineModuleInfo(); @@ -158,8 +164,12 @@ public: const LLVMTargetMachine &getTarget() const { return TM; } - const MCContext &getContext() const { return Context; } - MCContext &getContext() { return Context; } + const MCContext &getContext() const { + return ExternalContext ? *ExternalContext : Context; + } + MCContext &getContext() { + return ExternalContext ? *ExternalContext : Context; + } const Module *getModule() const { return TheModule; } @@ -251,6 +261,12 @@ public: return Personalities; } /// \} + + // MMI owes MCContext. It should never be invalidated. + bool invalidate(Module &, const PreservedAnalyses &, + ModuleAnalysisManager::Invalidator &) { + return false; + } }; // End class MachineModuleInfo class MachineModuleInfoWrapperPass : public ImmutablePass { @@ -260,6 +276,9 @@ public: static char ID; // Pass identification, replacement for typeid explicit MachineModuleInfoWrapperPass(const LLVMTargetMachine *TM = nullptr); + explicit MachineModuleInfoWrapperPass(const LLVMTargetMachine *TM, + MCContext *ExtContext); + // Initialization and Finalization bool doInitialization(Module &) override; bool doFinalization(Module &) override; diff --git a/llvm/include/llvm/CodeGen/MachineOperand.h b/llvm/include/llvm/CodeGen/MachineOperand.h index 0f252137364c..b12351b8a702 100644 --- a/llvm/include/llvm/CodeGen/MachineOperand.h +++ b/llvm/include/llvm/CodeGen/MachineOperand.h @@ -33,7 +33,6 @@ class MachineRegisterInfo; class MCCFIInstruction; class MDNode; class ModuleSlotTracker; -class TargetMachine; class TargetIntrinsicInfo; class TargetRegisterInfo; class hash_code; @@ -728,12 +727,12 @@ public: /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. - void ChangeToImmediate(int64_t ImmVal); + void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags = 0); /// ChangeToFPImmediate - Replace this operand with a new FP immediate operand /// of the specified value. If an operand is known to be an FP immediate /// already, the setFPImm method should be used. - void ChangeToFPImmediate(const ConstantFP *FPImm); + void ChangeToFPImmediate(const ConstantFP *FPImm, unsigned TargetFlags = 0); /// ChangeToES - Replace this operand with a new external symbol operand. void ChangeToES(const char *SymName, unsigned TargetFlags = 0); @@ -743,10 +742,10 @@ public: unsigned TargetFlags = 0); /// ChangeToMCSymbol - Replace this operand with a new MC symbol operand. - void ChangeToMCSymbol(MCSymbol *Sym); + void ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags = 0); /// Replace this operand with a frame index. - void ChangeToFrameIndex(int Idx); + void ChangeToFrameIndex(int Idx, unsigned TargetFlags = 0); /// Replace this operand with a target index. void ChangeToTargetIndex(unsigned Idx, int64_t Offset, @@ -759,6 +758,11 @@ public: bool isKill = false, bool isDead = false, bool isUndef = false, bool isDebug = false); + /// getTargetIndexName - If this MachineOperand is a TargetIndex that has a + /// name, attempt to get the name. Returns nullptr if the TargetIndex does not + /// have a name. Asserts if MO is not a TargetIndex. + const char *getTargetIndexName() const; + //===--------------------------------------------------------------------===// // Construction methods. //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h index 4a1b04ab3e88..a5dbbdb4fdcd 100644 --- a/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -15,10 +15,11 @@ #ifndef LLVM_MACHINEOUTLINER_H #define LLVM_MACHINEOUTLINER_H +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/LivePhysRegs.h" namespace llvm { namespace outliner { diff --git a/llvm/include/llvm/CodeGen/MachinePassManager.h b/llvm/include/llvm/CodeGen/MachinePassManager.h new file mode 100644 index 000000000000..1489177d9668 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MachinePassManager.h @@ -0,0 +1,256 @@ +//===- PassManager.h --- Pass management for CodeGen ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header defines the pass manager interface for codegen. The codegen +// pipeline consists of only machine function passes. There is no container +// relationship between IR module/function and machine function in terms of pass +// manager organization. So there is no need for adaptor classes (for example +// ModuleToMachineFunctionAdaptor). Since invalidation could only happen among +// machine function passes, there is no proxy classes to handle cross-IR-unit +// invalidation. IR analysis results are provided for machine function passes by +// their respective analysis managers such as ModuleAnalysisManager and +// FunctionAnalysisManager. +// +// TODO: Add MachineFunctionProperties support. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINEPASSMANAGER_H +#define LLVM_CODEGEN_MACHINEPASSMANAGER_H + +#include "llvm/ADT/FunctionExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/type_traits.h" + +namespace llvm { +class Module; + +extern template class AnalysisManager<MachineFunction>; + +/// An AnalysisManager<MachineFunction> that also exposes IR analysis results. +class MachineFunctionAnalysisManager : public AnalysisManager<MachineFunction> { +public: + using Base = AnalysisManager<MachineFunction>; + + MachineFunctionAnalysisManager() : Base(false), FAM(nullptr), MAM(nullptr) {} + MachineFunctionAnalysisManager(FunctionAnalysisManager &FAM, + ModuleAnalysisManager &MAM, + bool DebugLogging = false) + : Base(DebugLogging), FAM(&FAM), MAM(&MAM) {} + MachineFunctionAnalysisManager(MachineFunctionAnalysisManager &&) = default; + MachineFunctionAnalysisManager & + operator=(MachineFunctionAnalysisManager &&) = default; + + /// Get the result of an analysis pass for a Function. + /// + /// Runs the analysis if a cached result is not available. + template <typename PassT> typename PassT::Result &getResult(Function &F) { + return FAM->getResult<PassT>(F); + } + + /// Get the cached result of an analysis pass for a Function. + /// + /// This method never runs the analysis. + /// + /// \returns null if there is no cached result. + template <typename PassT> + typename PassT::Result *getCachedResult(Function &F) { + return FAM->getCachedResult<PassT>(F); + } + + /// Get the result of an analysis pass for a Module. + /// + /// Runs the analysis if a cached result is not available. + template <typename PassT> typename PassT::Result &getResult(Module &M) { + return MAM->getResult<PassT>(M); + } + + /// Get the cached result of an analysis pass for a Module. + /// + /// This method never runs the analysis. + /// + /// \returns null if there is no cached result. + template <typename PassT> typename PassT::Result *getCachedResult(Module &M) { + return MAM->getCachedResult<PassT>(M); + } + + /// Get the result of an analysis pass for a MachineFunction. + /// + /// Runs the analysis if a cached result is not available. + using Base::getResult; + + /// Get the cached result of an analysis pass for a MachineFunction. + /// + /// This method never runs the analysis. + /// + /// returns null if there is no cached result. + using Base::getCachedResult; + + // FIXME: Add LoopAnalysisManager or CGSCCAnalysisManager if needed. + FunctionAnalysisManager *FAM; + ModuleAnalysisManager *MAM; +}; + +extern template class PassManager<MachineFunction>; + +/// MachineFunctionPassManager adds/removes below features to/from the base +/// PassManager template instantiation. +/// +/// - Support passes that implement doInitialization/doFinalization. This is for +/// machine function passes to work on module level constructs. One such pass +/// is AsmPrinter. +/// +/// - Support machine module pass which runs over the module (for example, +/// MachineOutliner). A machine module pass needs to define the method: +/// +/// ```Error run(Module &, MachineFunctionAnalysisManager &)``` +/// +/// FIXME: machine module passes still need to define the usual machine +/// function pass interface, namely, +/// `PreservedAnalyses run(MachineFunction &, +/// MachineFunctionAnalysisManager &)` +/// But this interface wouldn't be executed. It is just a placeholder +/// to satisfy the pass manager type-erased inteface. This +/// special-casing of machine module pass is due to its limited use +/// cases and the unnecessary complexity it may bring to the machine +/// pass manager. +/// +/// - The base class `run` method is replaced by an alternative `run` method. +/// See details below. +/// +/// - Support codegening in the SCC order. Users include interprocedural +/// register allocation (IPRA). +class MachineFunctionPassManager + : public PassManager<MachineFunction, MachineFunctionAnalysisManager> { + using Base = PassManager<MachineFunction, MachineFunctionAnalysisManager>; + +public: + MachineFunctionPassManager(bool DebugLogging = false, + bool RequireCodeGenSCCOrder = false, + bool VerifyMachineFunction = false) + : Base(DebugLogging), RequireCodeGenSCCOrder(RequireCodeGenSCCOrder), + VerifyMachineFunction(VerifyMachineFunction) {} + MachineFunctionPassManager(MachineFunctionPassManager &&) = default; + MachineFunctionPassManager & + operator=(MachineFunctionPassManager &&) = default; + + /// Run machine passes for a Module. + /// + /// The intended use is to start the codegen pipeline for a Module. The base + /// class's `run` method is deliberately hidden by this due to the observation + /// that we don't yet have the use cases of compositing two instances of + /// machine pass managers, or compositing machine pass managers with other + /// types of pass managers. + Error run(Module &M, MachineFunctionAnalysisManager &MFAM); + + template <typename PassT> void addPass(PassT &&Pass) { + Base::addPass(std::forward<PassT>(Pass)); + PassConceptT *P = Passes.back().get(); + addDoInitialization<PassT>(P); + addDoFinalization<PassT>(P); + + // Add machine module pass. + addRunOnModule<PassT>(P); + } + +private: + template <typename PassT> + using has_init_t = decltype(std::declval<PassT &>().doInitialization( + std::declval<Module &>(), + std::declval<MachineFunctionAnalysisManager &>())); + + template <typename PassT> + std::enable_if_t<!is_detected<has_init_t, PassT>::value> + addDoInitialization(PassConceptT *Pass) {} + + template <typename PassT> + std::enable_if_t<is_detected<has_init_t, PassT>::value> + addDoInitialization(PassConceptT *Pass) { + using PassModelT = + detail::PassModel<MachineFunction, PassT, PreservedAnalyses, + MachineFunctionAnalysisManager>; + auto *P = static_cast<PassModelT *>(Pass); + InitializationFuncs.emplace_back( + [=](Module &M, MachineFunctionAnalysisManager &MFAM) { + return P->Pass.doInitialization(M, MFAM); + }); + } + + template <typename PassT> + using has_fini_t = decltype(std::declval<PassT &>().doFinalization( + std::declval<Module &>(), + std::declval<MachineFunctionAnalysisManager &>())); + + template <typename PassT> + std::enable_if_t<!is_detected<has_fini_t, PassT>::value> + addDoFinalization(PassConceptT *Pass) {} + + template <typename PassT> + std::enable_if_t<is_detected<has_fini_t, PassT>::value> + addDoFinalization(PassConceptT *Pass) { + using PassModelT = + detail::PassModel<MachineFunction, PassT, PreservedAnalyses, + MachineFunctionAnalysisManager>; + auto *P = static_cast<PassModelT *>(Pass); + FinalizationFuncs.emplace_back( + [=](Module &M, MachineFunctionAnalysisManager &MFAM) { + return P->Pass.doFinalization(M, MFAM); + }); + } + + template <typename PassT> + using is_machine_module_pass_t = decltype(std::declval<PassT &>().run( + std::declval<Module &>(), + std::declval<MachineFunctionAnalysisManager &>())); + + template <typename PassT> + using is_machine_function_pass_t = decltype(std::declval<PassT &>().run( + std::declval<MachineFunction &>(), + std::declval<MachineFunctionAnalysisManager &>())); + + template <typename PassT> + std::enable_if_t<!is_detected<is_machine_module_pass_t, PassT>::value> + addRunOnModule(PassConceptT *Pass) {} + + template <typename PassT> + std::enable_if_t<is_detected<is_machine_module_pass_t, PassT>::value> + addRunOnModule(PassConceptT *Pass) { + static_assert(is_detected<is_machine_function_pass_t, PassT>::value, + "machine module pass needs to define machine function pass " + "api. sorry."); + + using PassModelT = + detail::PassModel<MachineFunction, PassT, PreservedAnalyses, + MachineFunctionAnalysisManager>; + auto *P = static_cast<PassModelT *>(Pass); + MachineModulePasses.emplace( + Passes.size() - 1, + [=](Module &M, MachineFunctionAnalysisManager &MFAM) { + return P->Pass.run(M, MFAM); + }); + } + + using FuncTy = Error(Module &, MachineFunctionAnalysisManager &); + SmallVector<llvm::unique_function<FuncTy>, 4> InitializationFuncs; + SmallVector<llvm::unique_function<FuncTy>, 4> FinalizationFuncs; + + using PassIndex = decltype(Passes)::size_type; + std::map<PassIndex, llvm::unique_function<FuncTy>> MachineModulePasses; + + // Run codegen in the SCC order. + bool RequireCodeGenSCCOrder; + + bool VerifyMachineFunction; +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_MACHINEPASSMANAGER_H diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def new file mode 100644 index 000000000000..e9eaa5f77000 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -0,0 +1,197 @@ +//===- MachinePassRegistry.def - Registry of passes -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is used as the registry of passes that are for target-independent +// code generator. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +#ifndef MODULE_ANALYSIS +#define MODULE_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC)) +#undef MODULE_ANALYSIS + +#ifndef MODULE_PASS +#define MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass, ()) +#undef MODULE_PASS + +#ifndef FUNCTION_ANALYSIS +#define FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC)) +FUNCTION_ANALYSIS("targetir", TargetIRAnalysis, (std::move(TM.getTargetIRAnalysis()))) +#undef FUNCTION_ANALYSIS + +#ifndef FUNCTION_PASS +#define FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +FUNCTION_PASS("mergeicmps", MergeICmpsPass, ()) +FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass, ()) +FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ()) +FUNCTION_PASS("consthoist", ConstantHoistingPass, ()) +FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ()) +FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false)) +FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true)) +FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ()) +FUNCTION_PASS("lowerinvoke", LowerInvokePass, ()) +FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass, ()) +FUNCTION_PASS("verify", VerifierPass, ()) +#undef FUNCTION_PASS + +#ifndef LOOP_PASS +#define LOOP_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +LOOP_PASS("loop-reduce", LoopStrengthReducePass, ()) +#undef LOOP_PASS + +#ifndef MACHINE_MODULE_PASS +#define MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +#undef MACHINE_MODULE_PASS + +#ifndef MACHINE_FUNCTION_ANALYSIS +#define MACHINE_FUNCTION_ANALYSIS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +MACHINE_FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis, (PIC)) +// LiveVariables currently requires pure SSA form. +// FIXME: Once TwoAddressInstruction pass no longer uses kill flags, +// LiveVariables can be removed completely, and LiveIntervals can be directly +// computed. (We still either need to regenerate kill flags after regalloc, or +// preferably fix the scavenger to not depend on them). +// MACHINE_FUNCTION_ANALYSIS("live-vars", LiveVariablesAnalysis()) + +// MACHINE_FUNCTION_ANALYSIS("live-stacks", LiveStacksPass()) +// MACHINE_FUNCTION_ANALYSIS("slot-indexes", SlotIndexesAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("edge-bundles", EdgeBundlesAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("lazy-machine-bfi", LazyMachineBlockFrequencyInfoAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-bfi", MachineBlockFrequencyInfoAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-loops", MachineLoopInfoAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-dom-frontier", MachineDominanceFrontierAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-dom-tree", MachineDominatorTreeAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-ore", MachineOptimizationRemarkEmitterPassAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-post-dom-tree", MachinePostDominatorTreeAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-region-info", MachineRegionInfoPassAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("machine-trace-metrics", MachineTraceMetricsAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("reaching-def", ReachingDefAnalysisAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("live-reg-matrix", LiveRegMatrixAnalysis()) +// MACHINE_FUNCTION_ANALYSIS("gc-analysis", GCMachineCodeAnalysisPass()) +#undef MACHINE_FUNCTION_ANALYSIS + +#ifndef MACHINE_FUNCTION_PASS +#define MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +// MACHINE_FUNCTION_PASS("mir-printer", PrintMIRPass, ()) +// MACHINE_FUNCTION_PASS("free-machine-function", FreeMachineFunctionPass, ()) +#undef MACHINE_FUNCTION_PASS + +// After a pass is converted to new pass manager, its entry should be moved from +// dummy table to the normal one. For example, for a machine function pass, +// DUMMY_MACHINE_FUNCTION_PASS to MACHINE_FUNCTION_PASS. + +#ifndef DUMMY_FUNCTION_PASS +#define DUMMY_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +DUMMY_FUNCTION_PASS("expandmemcmp", ExpandMemCmpPass, ()) +DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ()) +DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ()) +DUMMY_FUNCTION_PASS("sjljehprepare", SjLjEHPreparePass, ()) +DUMMY_FUNCTION_PASS("dwarfehprepare", DwarfEHPass, ()) +DUMMY_FUNCTION_PASS("winehprepare", WinEHPass, ()) +DUMMY_FUNCTION_PASS("wasmehprepare", WasmEHPass, ()) +DUMMY_FUNCTION_PASS("codegenprepare", CodeGenPreparePass, ()) +DUMMY_FUNCTION_PASS("safe-stack", SafeStackPass, ()) +DUMMY_FUNCTION_PASS("stack-protector", StackProtectorPass, ()) +DUMMY_FUNCTION_PASS("atomic-expand", AtomicExpandPass, ()) +DUMMY_FUNCTION_PASS("interleaved-access", InterleavedAccessPass, ()) +DUMMY_FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, ()) +DUMMY_FUNCTION_PASS("cfguard-dispatch", CFGuardDispatchPass, ()) +DUMMY_FUNCTION_PASS("cfguard-check", CFGuardCheckPass, ()) +DUMMY_FUNCTION_PASS("gc-info-printer", GCInfoPrinterPass, ()) +#undef DUMMY_FUNCTION_PASS + +#ifndef DUMMY_MODULE_PASS +#define DUMMY_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +DUMMY_MODULE_PASS("lower-emutls", LowerEmuTLSPass, ()) +#undef DUMMY_MODULE_PASS + +#ifndef DUMMY_MACHINE_MODULE_PASS +#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +DUMMY_MACHINE_MODULE_PASS("machine-outliner", MachineOutlinerPass, ()) +#undef DUMMY_MACHINE_MODULE_PASS + +#ifndef DUMMY_MACHINE_FUNCTION_PASS +#define DUMMY_MACHINE_FUNCTION_PASS(NAME, PASS_NAME, CONSTRUCTOR) +#endif +DUMMY_MACHINE_FUNCTION_PASS("mir-printer", PrintMIRPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("free-machine-function", FreeMachineFunctionPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("prologepilog", PrologEpilogInserterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("postrapseudos", ExpandPostRAPseudosPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("reg-usage-collector", RegUsageInfoCollectorPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("livedebugvalues", LiveDebugValuesPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass, ()) +DUMMY_MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-sink", MachineSinkingPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("stack-slot-coloring", StackSlotColoringPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("twoaddressinstruction", TwoAddressInstructionPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("liveintervals", LiveIntervalsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("simple-register-coalescing", RegisterCoalescerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("rename-independent-subregs", RenameIndependentSubregsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass, ()) +DUMMY_MACHINE_FUNCTION_PASS("block-placement", MachineBlockPlacementPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("cfguard-longjmp", CFGuardLongjmpPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("ra-basic", RABasicPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("ra-fast", RAFastPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("ra-greedy", RAGreedyPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("legalizer", LegalizerPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("irtranslator", IRTranslatorPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("instruction-select", InstructionSelectPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass, ()) +DUMMY_MACHINE_FUNCTION_PASS("machineverifier", MachineVerifierPass, ()) +#undef DUMMY_MACHINE_FUNCTION_PASS diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h index 8b2c27e7b888..f89a453749e8 100644 --- a/llvm/include/llvm/CodeGen/MachinePipeliner.h +++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h @@ -40,8 +40,6 @@ #ifndef LLVM_LIB_CODEGEN_MACHINEPIPELINER_H #define LLVM_LIB_CODEGEN_MACHINEPIPELINER_H -#include "llvm/Analysis/AliasAnalysis.h" - #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/RegisterClassInfo.h" @@ -51,6 +49,7 @@ namespace llvm { +class AAResults; class NodeSet; class SMSchedule; @@ -92,15 +91,7 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<AAResultsWrapperPass>(); - AU.addPreserved<AAResultsWrapperPass>(); - AU.addRequired<MachineLoopInfo>(); - AU.addRequired<MachineDominatorTree>(); - AU.addRequired<LiveIntervals>(); - AU.addRequired<MachineOptimizationRemarkEmitterPass>(); - MachineFunctionPass::getAnalysisUsage(AU); - } + void getAnalysisUsage(AnalysisUsage &AU) const override; private: void preprocessPhiNodes(MachineBasicBlock &B); @@ -285,7 +276,7 @@ public: static bool classof(const ScheduleDAGInstrs *DAG) { return true; } private: - void addLoopCarriedDependences(AliasAnalysis *AA); + void addLoopCarriedDependences(AAResults *AA); void updatePhiDependences(); void changeDependences(); unsigned calculateResMII(); @@ -304,7 +295,7 @@ private: void checkValidNodeOrder(const NodeSetType &Circuits) const; bool schedulePipeline(SMSchedule &Schedule); bool computeDelta(MachineInstr &MI, unsigned &Delta); - MachineInstr *findDefInLoop(unsigned Reg); + MachineInstr *findDefInLoop(Register Reg); bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos, unsigned &OffsetPos, unsigned &NewBase, int64_t &NewOffset); diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index 35aab5018fa4..57086b4eebd6 100644 --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -442,10 +442,20 @@ public: /// Return true if there is exactly one operand defining the specified /// register. bool hasOneDef(Register RegNo) const { - def_iterator DI = def_begin(RegNo); - if (DI == def_end()) - return false; - return ++DI == def_end(); + return hasSingleElement(def_operands(RegNo)); + } + + /// Returns the defining operand if there is exactly one operand defining the + /// specified register, otherwise nullptr. + MachineOperand *getOneDef(Register Reg) const { + def_iterator DI = def_begin(Reg); + if (DI == def_end()) // No defs. + return nullptr; + + def_iterator OneDef = DI; + if (++DI == def_end()) + return &*OneDef; + return nullptr; // Multiple defs. } /// use_iterator/use_begin/use_end - Walk all uses of the specified register. @@ -498,10 +508,7 @@ public: /// hasOneUse - Return true if there is exactly one instruction using the /// specified register. bool hasOneUse(Register RegNo) const { - use_iterator UI = use_begin(RegNo); - if (UI == use_end()) - return false; - return ++UI == use_end(); + return hasSingleElement(use_operands(RegNo)); } /// use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the @@ -612,14 +619,10 @@ public: /// function. Writing to a constant register has no effect. bool isConstantPhysReg(MCRegister PhysReg) const; - /// Returns true if either isConstantPhysReg or TRI->isCallerPreservedPhysReg - /// returns true. This is a utility member function. - bool isCallerPreservedOrConstPhysReg(MCRegister PhysReg) const; - /// Get an iterator over the pressure sets affected by the given physical or /// virtual register. If RegUnit is physical, it must be a register unit (from /// MCRegUnitIterator). - PSetIterator getPressureSets(unsigned RegUnit) const; + PSetIterator getPressureSets(Register RegUnit) const; //===--------------------------------------------------------------------===// // Virtual Register Info @@ -894,7 +897,7 @@ public: /// /// Reserved registers may belong to an allocatable register class, but the /// target has explicitly requested that they are not used. - bool isReserved(Register PhysReg) const { + bool isReserved(MCRegister PhysReg) const { return getReservedRegs().test(PhysReg.id()); } @@ -1174,14 +1177,13 @@ class PSetIterator { public: PSetIterator() = default; - PSetIterator(unsigned RegUnit, const MachineRegisterInfo *MRI) { + PSetIterator(Register RegUnit, const MachineRegisterInfo *MRI) { const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); - if (Register::isVirtualRegister(RegUnit)) { + if (RegUnit.isVirtual()) { const TargetRegisterClass *RC = MRI->getRegClass(RegUnit); PSet = TRI->getRegClassPressureSets(RC); Weight = TRI->getRegClassWeight(RC).RegWeight; - } - else { + } else { PSet = TRI->getRegUnitPressureSets(RegUnit); Weight = TRI->getRegUnitWeight(RegUnit); } @@ -1203,8 +1205,8 @@ public: } }; -inline PSetIterator MachineRegisterInfo:: -getPressureSets(unsigned RegUnit) const { +inline PSetIterator +MachineRegisterInfo::getPressureSets(Register RegUnit) const { return PSetIterator(RegUnit, this); } diff --git a/llvm/include/llvm/CodeGen/MachineSSAUpdater.h b/llvm/include/llvm/CodeGen/MachineSSAUpdater.h index df972e12d461..0af356e376ab 100644 --- a/llvm/include/llvm/CodeGen/MachineSSAUpdater.h +++ b/llvm/include/llvm/CodeGen/MachineSSAUpdater.h @@ -40,9 +40,6 @@ private: //typedef DenseMap<MachineBasicBlock*, Register> AvailableValsTy; void *AV = nullptr; - /// VR - Current virtual register whose uses are being updated. - Register VR; - /// VRC - Register class of the current virtual register. const TargetRegisterClass *VRC; @@ -65,6 +62,7 @@ public: /// Initialize - Reset this object to get ready for a new set of SSA /// updates. void Initialize(Register V); + void Initialize(const TargetRegisterClass *RC); /// AddAvailableValue - Indicate that a rewritten value is available at the /// end of the specified block with the specified value. diff --git a/llvm/include/llvm/CodeGen/MachineStableHash.h b/llvm/include/llvm/CodeGen/MachineStableHash.h new file mode 100644 index 000000000000..8423b2da1c78 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MachineStableHash.h @@ -0,0 +1,30 @@ +//===------------ MachineStableHash.h - MIR Stable Hashing Utilities ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Stable hashing for MachineInstr and MachineOperand. Useful or getting a +// hash across runs, modules, etc. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINESTABLEHASH_H +#define LLVM_CODEGEN_MACHINESTABLEHASH_H + +#include "llvm/CodeGen/StableHashing.h" + +namespace llvm { +class MachineInstr; +class MachineOperand; + +stable_hash stableHashValue(const MachineOperand &MO); +stable_hash stableHashValue(const MachineInstr &MI, bool HashVRegs = false, + bool HashConstantPoolIndices = false, + bool HashMemOperands = false); + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/CodeGen/MachineTraceMetrics.h b/llvm/include/llvm/CodeGen/MachineTraceMetrics.h index 025989504177..46b57365e653 100644 --- a/llvm/include/llvm/CodeGen/MachineTraceMetrics.h +++ b/llvm/include/llvm/CodeGen/MachineTraceMetrics.h @@ -140,13 +140,13 @@ public: /// successors. struct LiveInReg { /// The virtual register required, or a register unit. - unsigned Reg; + Register Reg; /// For virtual registers: Minimum height of the defining instruction. /// For regunits: Height of the highest user in the trace. unsigned Height; - LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {} + LiveInReg(Register Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {} }; /// Per-basic block information that relates to a specific trace through the diff --git a/llvm/include/llvm/CodeGen/MultiHazardRecognizer.h b/llvm/include/llvm/CodeGen/MultiHazardRecognizer.h new file mode 100644 index 000000000000..9846045ff014 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MultiHazardRecognizer.h @@ -0,0 +1,47 @@ +//=- llvm/CodeGen/MultiHazardRecognizer.h - Scheduling Support ----*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the MultiHazardRecognizer class, which is a wrapper +// for a set of ScheduleHazardRecognizer instances +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MULTIHAZARDRECOGNIZER_H +#define LLVM_CODEGEN_MULTIHAZARDRECOGNIZER_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" + +namespace llvm { + +class MachineInstr; +class SUnit; + +class MultiHazardRecognizer : public ScheduleHazardRecognizer { + SmallVector<std::unique_ptr<ScheduleHazardRecognizer>, 4> Recognizers; + +public: + MultiHazardRecognizer() = default; + void AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer> &&); + + bool atIssueLimit() const override; + HazardType getHazardType(SUnit *, int Stalls = 0) override; + void Reset() override; + void EmitInstruction(SUnit *) override; + void EmitInstruction(MachineInstr *) override; + unsigned PreEmitNoops(SUnit *) override; + unsigned PreEmitNoops(MachineInstr *) override; + bool ShouldPreferAnother(SUnit *) override; + void AdvanceCycle() override; + void RecedeCycle() override; + void EmitNoop() override; +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_MULTIHAZARDRECOGNIZER_H diff --git a/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h b/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h index 56db30ff7d6d..fe07c70d85c5 100644 --- a/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h +++ b/llvm/include/llvm/CodeGen/NonRelocatableStringpool.h @@ -39,7 +39,7 @@ public: /// Get the offset of string \p S in the string table. This can insert a new /// element or return the offset of a pre-existing one. - uint32_t getStringOffset(StringRef S) { return getEntry(S).getOffset(); } + uint64_t getStringOffset(StringRef S) { return getEntry(S).getOffset(); } /// Get permanent storage for \p S (but do not necessarily emit \p S in the /// output section). A latter call to getStringOffset() with the same string @@ -57,7 +57,7 @@ public: private: MapTy Strings; - uint32_t CurrentEndOffset = 0; + uint64_t CurrentEndOffset = 0; unsigned NumEntries = 0; DwarfStringPoolEntryRef EmptyString; std::function<StringRef(StringRef Input)> Translator; diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 9e5b4446c195..676ed2c65eb1 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -44,11 +44,15 @@ namespace llvm { /// the entry block. FunctionPass *createUnreachableBlockEliminationPass(); - /// createBBSectionsPrepare Pass - This pass assigns sections to machine basic - /// blocks and is enabled with -fbasic-block-sections. - /// Buf is a memory buffer that contains the list of functions and basic - /// block ids to selectively enable basic block sections. - MachineFunctionPass *createBBSectionsPreparePass(const MemoryBuffer *Buf); + /// createBasicBlockSections Pass - This pass assigns sections to machine + /// basic blocks and is enabled with -fbasic-block-sections. Buf is a memory + /// buffer that contains the list of functions and basic block ids to + /// selectively enable basic block sections. + MachineFunctionPass *createBasicBlockSectionsPass(const MemoryBuffer *Buf); + + /// createMachineFunctionSplitterPass - This pass splits machine functions + /// using profile information. + MachineFunctionPass *createMachineFunctionSplitterPass(); /// MachineFunctionPrinter pass - This pass prints out the machine function to /// the given stream as a debugging tool. @@ -72,10 +76,6 @@ namespace llvm { /// matching during instruction selection. FunctionPass *createCodeGenPreparePass(); - /// createScalarizeMaskedMemIntrinPass - Replace masked load, store, gather - /// and scatter intrinsics with scalar code when target doesn't support them. - FunctionPass *createScalarizeMaskedMemIntrinPass(); - /// AtomicExpandID -- Lowers atomic operations in terms of either cmpxchg /// load-linked/store-conditional loops. extern char &AtomicExpandID; @@ -387,10 +387,6 @@ namespace llvm { /// createJumpInstrTables - This pass creates jump-instruction tables. ModulePass *createJumpInstrTablesPass(); - /// createForwardControlFlowIntegrityPass - This pass adds control-flow - /// integrity. - ModulePass *createForwardControlFlowIntegrityPass(); - /// InterleavedAccess Pass - This pass identifies and matches interleaved /// memory accesses to target specific intrinsics. /// @@ -471,6 +467,9 @@ namespace llvm { /// Create Hardware Loop pass. \see HardwareLoops.cpp FunctionPass *createHardwareLoopsPass(); + /// This pass inserts pseudo probe annotation for callsite profiling. + FunctionPass *createPseudoProbeInserter(); + /// Create IR Type Promotion pass. \see TypePromotion.cpp FunctionPass *createTypePromotionPass(); @@ -483,9 +482,16 @@ namespace llvm { /// info was generated by another source such as clang. ModulePass *createStripDebugMachineModulePass(bool OnlyDebugified); + /// Creates MIR Check Debug pass. \see MachineCheckDebugify.cpp + ModulePass *createCheckDebugMachineModulePass(); + /// The pass fixups statepoint machine instruction to replace usage of /// caller saved registers with stack slots. extern char &FixupStatepointCallerSavedID; + + /// The pass transform load/store <256 x i32> to AMX load/store intrinsics + /// or split the data to two <128 x i32>. + FunctionPass *createX86LowerAMXTypePass(); } // End llvm namespace #endif diff --git a/llvm/include/llvm/CodeGen/RDFGraph.h b/llvm/include/llvm/CodeGen/RDFGraph.h index 585f43e116f9..00d6ec93d555 100644 --- a/llvm/include/llvm/CodeGen/RDFGraph.h +++ b/llvm/include/llvm/CodeGen/RDFGraph.h @@ -753,10 +753,6 @@ namespace rdf { NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA) const; - NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA, - NodeAddr<RefNode*> RA, bool Create); - NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA, - NodeAddr<RefNode*> RA) const; NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA, bool Create); NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA, diff --git a/llvm/include/llvm/CodeGen/RDFLiveness.h b/llvm/include/llvm/CodeGen/RDFLiveness.h index ea4890271726..d39d3585e7bd 100644 --- a/llvm/include/llvm/CodeGen/RDFLiveness.h +++ b/llvm/include/llvm/CodeGen/RDFLiveness.h @@ -18,6 +18,8 @@ #include "llvm/MC/LaneBitmask.h" #include <map> #include <set> +#include <unordered_map> +#include <unordered_set> #include <utility> namespace llvm { @@ -28,6 +30,30 @@ class MachineDominatorTree; class MachineRegisterInfo; class TargetRegisterInfo; +} // namespace llvm + +namespace llvm { +namespace rdf { +namespace detail { + +using NodeRef = std::pair<NodeId, LaneBitmask>; + +} // namespace detail +} // namespace rdf +} // namespace llvm + +namespace std { + +template <> struct hash<llvm::rdf::detail::NodeRef> { + std::size_t operator()(llvm::rdf::detail::NodeRef R) const { + return std::hash<llvm::rdf::NodeId>{}(R.first) ^ + std::hash<llvm::LaneBitmask::Type>{}(R.second.getAsInteger()); + } +}; + +} // namespace std + +namespace llvm { namespace rdf { struct Liveness { @@ -46,10 +72,9 @@ namespace rdf { std::map<MachineBasicBlock*,RegisterAggr> Map; }; - using NodeRef = std::pair<NodeId, LaneBitmask>; - using NodeRefSet = std::set<NodeRef>; - // RegisterId in RefMap must be normalized. - using RefMap = std::map<RegisterId, NodeRefSet>; + using NodeRef = detail::NodeRef; + using NodeRefSet = std::unordered_set<NodeRef>; + using RefMap = std::unordered_map<RegisterId, NodeRefSet>; Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g) : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()), @@ -110,15 +135,14 @@ namespace rdf { // Cache of mapping from node ids (for RefNodes) to the containing // basic blocks. Not computing it each time for each node reduces // the liveness calculation time by a large fraction. - using NodeBlockMap = DenseMap<NodeId, MachineBasicBlock *>; - NodeBlockMap NBMap; + DenseMap<NodeId, MachineBasicBlock *> NBMap; // Phi information: // // RealUseMap // map: NodeId -> (map: RegisterId -> NodeRefSet) // phi id -> (map: register -> set of reached non-phi uses) - std::map<NodeId, RefMap> RealUseMap; + DenseMap<NodeId, RefMap> RealUseMap; // Inverse iterated dominance frontier. std::map<MachineBasicBlock*,std::set<MachineBasicBlock*>> IIDF; diff --git a/llvm/include/llvm/CodeGen/RDFRegisters.h b/llvm/include/llvm/CodeGen/RDFRegisters.h index 4afaf80e4659..c49b4883e1c1 100644 --- a/llvm/include/llvm/CodeGen/RDFRegisters.h +++ b/llvm/include/llvm/CodeGen/RDFRegisters.h @@ -91,6 +91,11 @@ namespace rdf { bool operator< (const RegisterRef &RR) const { return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask); } + + size_t hash() const { + return std::hash<RegisterId>{}(Reg) ^ + std::hash<LaneBitmask::Type>{}(Mask.getAsInteger()); + } }; @@ -110,8 +115,6 @@ namespace rdf { return RegMasks.get(Register::stackSlot2Index(R)); } - RegisterRef normalize(RegisterRef RR) const; - bool alias(RegisterRef RA, RegisterRef RB) const { if (!isRegMaskId(RA.Reg)) return !isRegMaskId(RB.Reg) ? aliasRR(RA, RB) : aliasRM(RA, RB); @@ -128,6 +131,10 @@ namespace rdf { return MaskInfos[Register::stackSlot2Index(MaskId)].Units; } + const BitVector &getUnitAliases(uint32_t U) const { + return AliasInfos[U].Regs; + } + RegisterRef mapTo(RegisterRef RR, unsigned R) const; const TargetRegisterInfo &getTRI() const { return TRI; } @@ -142,12 +149,16 @@ namespace rdf { struct MaskInfo { BitVector Units; }; + struct AliasInfo { + BitVector Regs; + }; const TargetRegisterInfo &TRI; IndexedSet<const uint32_t*> RegMasks; std::vector<RegInfo> RegInfos; std::vector<UnitInfo> UnitInfos; std::vector<MaskInfo> MaskInfos; + std::vector<AliasInfo> AliasInfos; bool aliasRR(RegisterRef RA, RegisterRef RB) const; bool aliasRM(RegisterRef RR, RegisterRef RM) const; @@ -159,10 +170,15 @@ namespace rdf { : Units(pri.getTRI().getNumRegUnits()), PRI(pri) {} RegisterAggr(const RegisterAggr &RG) = default; + unsigned count() const { return Units.count(); } bool empty() const { return Units.none(); } bool hasAliasOf(RegisterRef RR) const; bool hasCoverOf(RegisterRef RR) const; + bool operator==(const RegisterAggr &A) const { + return DenseMapInfo<BitVector>::isEqual(Units, A.Units); + } + static bool isCoverOf(RegisterRef RA, RegisterRef RB, const PhysicalRegisterInfo &PRI) { return RegisterAggr(PRI).insert(RA).hasCoverOf(RB); @@ -179,6 +195,10 @@ namespace rdf { RegisterRef clearIn(RegisterRef RR) const; RegisterRef makeRegRef() const; + size_t hash() const { + return DenseMapInfo<BitVector>::getHashValue(Units); + } + void print(raw_ostream &OS) const; struct rr_iterator { @@ -233,8 +253,27 @@ namespace rdf { }; raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P); + raw_ostream &operator<< (raw_ostream &OS, const RegisterAggr &A); } // end namespace rdf } // end namespace llvm +namespace std { + template <> struct hash<llvm::rdf::RegisterRef> { + size_t operator()(llvm::rdf::RegisterRef A) const { + return A.hash(); + } + }; + template <> struct hash<llvm::rdf::RegisterAggr> { + size_t operator()(const llvm::rdf::RegisterAggr &A) const { + return A.hash(); + } + }; + template <> struct equal_to<llvm::rdf::RegisterAggr> { + bool operator()(const llvm::rdf::RegisterAggr &A, + const llvm::rdf::RegisterAggr &B) const { + return A == B; + } + }; +} #endif // LLVM_LIB_TARGET_HEXAGON_RDFREGISTERS_H diff --git a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h index a8a436337e07..bcb48de2fe5a 100644 --- a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h +++ b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h @@ -139,23 +139,25 @@ public: /// Provides the instruction id of the closest reaching def instruction of /// PhysReg that reaches MI, relative to the begining of MI's basic block. - int getReachingDef(MachineInstr *MI, int PhysReg) const; + int getReachingDef(MachineInstr *MI, MCRegister PhysReg) const; /// Return whether A and B use the same def of PhysReg. - bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, int PhysReg) const; + bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, + MCRegister PhysReg) const; /// Return whether the reaching def for MI also is live out of its parent /// block. - bool isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const; + bool isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const; /// Return the local MI that produces the live out value for PhysReg, or /// nullptr for a non-live out or non-local def. MachineInstr *getLocalLiveOutMIDef(MachineBasicBlock *MBB, - int PhysReg) const; + MCRegister PhysReg) const; /// If a single MachineInstr creates the reaching definition, then return it. /// Otherwise return null. - MachineInstr *getUniqueReachingMIDef(MachineInstr *MI, int PhysReg) const; + MachineInstr *getUniqueReachingMIDef(MachineInstr *MI, + MCRegister PhysReg) const; /// If a single MachineInstr creates the reaching definition, for MIs operand /// at Idx, then return it. Otherwise return null. @@ -167,40 +169,45 @@ public: /// Provide whether the register has been defined in the same basic block as, /// and before, MI. - bool hasLocalDefBefore(MachineInstr *MI, int PhysReg) const; + bool hasLocalDefBefore(MachineInstr *MI, MCRegister PhysReg) const; /// Return whether the given register is used after MI, whether it's a local /// use or a live out. - bool isRegUsedAfter(MachineInstr *MI, int PhysReg) const; + bool isRegUsedAfter(MachineInstr *MI, MCRegister PhysReg) const; /// Return whether the given register is defined after MI. - bool isRegDefinedAfter(MachineInstr *MI, int PhysReg) const; + bool isRegDefinedAfter(MachineInstr *MI, MCRegister PhysReg) const; /// Provides the clearance - the number of instructions since the closest /// reaching def instuction of PhysReg that reaches MI. - int getClearance(MachineInstr *MI, MCPhysReg PhysReg) const; + int getClearance(MachineInstr *MI, MCRegister PhysReg) const; /// Provides the uses, in the same block as MI, of register that MI defines. /// This does not consider live-outs. - void getReachingLocalUses(MachineInstr *MI, int PhysReg, + void getReachingLocalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const; /// Search MBB for a definition of PhysReg and insert it into Defs. If no /// definition is found, recursively search the predecessor blocks for them. - void getLiveOuts(MachineBasicBlock *MBB, int PhysReg, InstSet &Defs, + void getLiveOuts(MachineBasicBlock *MBB, MCRegister PhysReg, InstSet &Defs, BlockSet &VisitedBBs) const; - void getLiveOuts(MachineBasicBlock *MBB, int PhysReg, InstSet &Defs) const; + void getLiveOuts(MachineBasicBlock *MBB, MCRegister PhysReg, + InstSet &Defs) const; /// For the given block, collect the instructions that use the live-in /// value of the provided register. Return whether the value is still /// live on exit. - bool getLiveInUses(MachineBasicBlock *MBB, int PhysReg, + bool getLiveInUses(MachineBasicBlock *MBB, MCRegister PhysReg, InstSet &Uses) const; /// Collect the users of the value stored in PhysReg, which is defined /// by MI. - void getGlobalUses(MachineInstr *MI, int PhysReg, - InstSet &Uses) const; + void getGlobalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const; + + /// Collect all possible definitions of the value stored in PhysReg, which is + /// used by MI. + void getGlobalReachingDefs(MachineInstr *MI, MCRegister PhysReg, + InstSet &Defs) const; /// Return whether From can be moved forwards to just before To. bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const; @@ -224,12 +231,13 @@ public: /// Return whether a MachineInstr could be inserted at MI and safely define /// the given register without affecting the program. - bool isSafeToDefRegAt(MachineInstr *MI, int PhysReg) const; + bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg) const; /// Return whether a MachineInstr could be inserted at MI and safely define /// the given register without affecting the program, ignoring any effects /// on the provided instructions. - bool isSafeToDefRegAt(MachineInstr *MI, int PhysReg, InstSet &Ignore) const; + bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg, + InstSet &Ignore) const; private: /// Set up LiveRegs by merging predecessor live-out values. @@ -264,7 +272,8 @@ private: /// Provides the instruction of the closest reaching def instruction of /// PhysReg that reaches MI, relative to the begining of MI's basic block. - MachineInstr *getReachingLocalMIDef(MachineInstr *MI, int PhysReg) const; + MachineInstr *getReachingLocalMIDef(MachineInstr *MI, + MCRegister PhysReg) const; }; } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/RegAllocPBQP.h b/llvm/include/llvm/CodeGen/RegAllocPBQP.h index f7f92248f4ce..1ed55082e32c 100644 --- a/llvm/include/llvm/CodeGen/RegAllocPBQP.h +++ b/llvm/include/llvm/CodeGen/RegAllocPBQP.h @@ -22,6 +22,8 @@ #include "llvm/CodeGen/PBQP/Math.h" #include "llvm/CodeGen/PBQP/ReductionRules.h" #include "llvm/CodeGen/PBQP/Solution.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/MC/MCRegister.h" #include "llvm/Support/ErrorHandling.h" #include <algorithm> #include <cassert> @@ -96,13 +98,13 @@ public: AllowedRegVector() = default; AllowedRegVector(AllowedRegVector &&) = default; - AllowedRegVector(const std::vector<unsigned> &OptVec) - : NumOpts(OptVec.size()), Opts(new unsigned[NumOpts]) { + AllowedRegVector(const std::vector<MCRegister> &OptVec) + : NumOpts(OptVec.size()), Opts(new MCRegister[NumOpts]) { std::copy(OptVec.begin(), OptVec.end(), Opts.get()); } unsigned size() const { return NumOpts; } - unsigned operator[](size_t I) const { return Opts[I]; } + MCRegister operator[](size_t I) const { return Opts[I]; } bool operator==(const AllowedRegVector &Other) const { if (NumOpts != Other.NumOpts) @@ -116,12 +118,12 @@ public: private: unsigned NumOpts = 0; - std::unique_ptr<unsigned[]> Opts; + std::unique_ptr<MCRegister[]> Opts; }; inline hash_code hash_value(const AllowedRegVector &OptRegs) { - unsigned *OStart = OptRegs.Opts.get(); - unsigned *OEnd = OptRegs.Opts.get() + OptRegs.NumOpts; + MCRegister *OStart = OptRegs.Opts.get(); + MCRegister *OEnd = OptRegs.Opts.get() + OptRegs.NumOpts; return hash_combine(OptRegs.NumOpts, hash_combine_range(OStart, OEnd)); } @@ -143,11 +145,11 @@ public: LiveIntervals &LIS; MachineBlockFrequencyInfo &MBFI; - void setNodeIdForVReg(unsigned VReg, GraphBase::NodeId NId) { - VRegToNodeId[VReg] = NId; + void setNodeIdForVReg(Register VReg, GraphBase::NodeId NId) { + VRegToNodeId[VReg.id()] = NId; } - GraphBase::NodeId getNodeIdForVReg(unsigned VReg) const { + GraphBase::NodeId getNodeIdForVReg(Register VReg) const { auto VRegItr = VRegToNodeId.find(VReg); if (VRegItr == VRegToNodeId.end()) return GraphBase::invalidNodeId(); @@ -159,7 +161,7 @@ public: } private: - DenseMap<unsigned, GraphBase::NodeId> VRegToNodeId; + DenseMap<Register, GraphBase::NodeId> VRegToNodeId; AllowedRegVecPool AllowedRegVecs; }; @@ -197,8 +199,8 @@ public: NodeMetadata(NodeMetadata &&) = default; NodeMetadata& operator=(NodeMetadata &&) = default; - void setVReg(unsigned VReg) { this->VReg = VReg; } - unsigned getVReg() const { return VReg; } + void setVReg(Register VReg) { this->VReg = VReg; } + Register getVReg() const { return VReg; } void setAllowedRegs(GraphMetadata::AllowedRegVecRef AllowedRegs) { this->AllowedRegs = std::move(AllowedRegs); @@ -256,7 +258,7 @@ private: unsigned NumOpts = 0; unsigned DeniedOpts = 0; std::unique_ptr<unsigned[]> OptUnsafeEdges; - unsigned VReg = 0; + Register VReg; GraphMetadata::AllowedRegVecRef AllowedRegs; #ifndef NDEBUG diff --git a/llvm/include/llvm/CodeGen/Register.h b/llvm/include/llvm/CodeGen/Register.h index 054040cd29a1..d7057cfb76e0 100644 --- a/llvm/include/llvm/CodeGen/Register.h +++ b/llvm/include/llvm/CodeGen/Register.h @@ -40,24 +40,24 @@ public: /// frame index in a variable that normally holds a register. isStackSlot() /// returns true if Reg is in the range used for stack slots. /// - /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack - /// slots, so if a variable may contains a stack slot, always check - /// isStackSlot() first. - /// + /// FIXME: remove in favor of member. static bool isStackSlot(unsigned Reg) { return MCRegister::isStackSlot(Reg); } + /// Return true if this is a stack slot. + bool isStack() const { return MCRegister::isStackSlot(Reg); } + /// Compute the frame index from a register value representing a stack slot. - static int stackSlot2Index(unsigned Reg) { - assert(isStackSlot(Reg) && "Not a stack slot"); + static int stackSlot2Index(Register Reg) { + assert(Reg.isStack() && "Not a stack slot"); return int(Reg - MCRegister::FirstStackSlot); } /// Convert a non-negative frame index to a stack slot register value. - static unsigned index2StackSlot(int FI) { + static Register index2StackSlot(int FI) { assert(FI >= 0 && "Cannot hold a negative frame index."); - return FI + MCRegister::FirstStackSlot; + return Register(FI + MCRegister::FirstStackSlot); } /// Return true if the specified register number is in @@ -69,20 +69,19 @@ public: /// Return true if the specified register number is in /// the virtual register namespace. static bool isVirtualRegister(unsigned Reg) { - assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); - return Reg & MCRegister::VirtualRegFlag; + return Reg & MCRegister::VirtualRegFlag && !isStackSlot(Reg); } /// Convert a virtual register number to a 0-based index. /// The first virtual register in a function will get the index 0. - static unsigned virtReg2Index(unsigned Reg) { + static unsigned virtReg2Index(Register Reg) { assert(isVirtualRegister(Reg) && "Not a virtual register"); return Reg & ~MCRegister::VirtualRegFlag; } /// Convert a 0-based index to a virtual register number. /// This is the inverse operation of VirtReg2IndexFunctor below. - static unsigned index2VirtReg(unsigned Index) { + static Register index2VirtReg(unsigned Index) { assert(Index < (1u << 31) && "Index too large for virtual register range."); return Index | MCRegister::VirtualRegFlag; } @@ -115,6 +114,15 @@ public: return MCRegister(Reg); } + /// Utility to check-convert this value to a MCRegister. The caller is + /// expected to have already validated that this Register is, indeed, + /// physical. + MCRegister asMCReg() const { + assert(Reg == MCRegister::NoRegister || + MCRegister::isPhysicalRegister(Reg)); + return MCRegister(Reg); + } + bool isValid() const { return Reg != MCRegister::NoRegister; } /// Comparisons between register objects diff --git a/llvm/include/llvm/CodeGen/RegisterPressure.h b/llvm/include/llvm/CodeGen/RegisterPressure.h index 92333b859f1b..1deeb4d41511 100644 --- a/llvm/include/llvm/CodeGen/RegisterPressure.h +++ b/llvm/include/llvm/CodeGen/RegisterPressure.h @@ -37,10 +37,10 @@ class MachineRegisterInfo; class RegisterClassInfo; struct RegisterMaskPair { - unsigned RegUnit; ///< Virtual register or register unit. + Register RegUnit; ///< Virtual register or register unit. LaneBitmask LaneMask; - RegisterMaskPair(unsigned RegUnit, LaneBitmask LaneMask) + RegisterMaskPair(Register RegUnit, LaneBitmask LaneMask) : RegUnit(RegUnit), LaneMask(LaneMask) {} }; @@ -157,7 +157,7 @@ public: const_iterator begin() const { return &PressureChanges[0]; } const_iterator end() const { return &PressureChanges[MaxPSets]; } - void addPressureChange(unsigned RegUnit, bool IsDec, + void addPressureChange(Register RegUnit, bool IsDec, const MachineRegisterInfo *MRI); void dump(const TargetRegisterInfo &TRI) const; @@ -275,24 +275,24 @@ private: RegSet Regs; unsigned NumRegUnits; - unsigned getSparseIndexFromReg(unsigned Reg) const { - if (Register::isVirtualRegister(Reg)) + unsigned getSparseIndexFromReg(Register Reg) const { + if (Reg.isVirtual()) return Register::virtReg2Index(Reg) + NumRegUnits; assert(Reg < NumRegUnits); return Reg; } - unsigned getRegFromSparseIndex(unsigned SparseIndex) const { + Register getRegFromSparseIndex(unsigned SparseIndex) const { if (SparseIndex >= NumRegUnits) - return Register::index2VirtReg(SparseIndex-NumRegUnits); - return SparseIndex; + return Register::index2VirtReg(SparseIndex - NumRegUnits); + return Register(SparseIndex); } public: void clear(); void init(const MachineRegisterInfo &MRI); - LaneBitmask contains(unsigned Reg) const { + LaneBitmask contains(Register Reg) const { unsigned SparseIndex = getSparseIndexFromReg(Reg); RegSet::const_iterator I = Regs.find(SparseIndex); if (I == Regs.end()) @@ -332,7 +332,7 @@ public: template<typename ContainerT> void appendTo(ContainerT &To) const { for (const IndexMaskPair &P : Regs) { - unsigned Reg = getRegFromSparseIndex(P.Index); + Register Reg = getRegFromSparseIndex(P.Index); if (P.LaneMask.any()) To.push_back(RegisterMaskPair(Reg, P.LaneMask)); } @@ -390,7 +390,7 @@ class RegPressureTracker { LiveRegSet LiveRegs; /// Set of vreg defs that start a live range. - SparseSet<unsigned, VirtReg2IndexFunctor> UntiedDefs; + SparseSet<Register, VirtReg2IndexFunctor> UntiedDefs; /// Live-through pressure. std::vector<unsigned> LiveThruPressure; @@ -532,7 +532,7 @@ public: return getDownwardPressure(MI, PressureResult, MaxPressureResult); } - bool hasUntiedDef(unsigned VirtReg) const { + bool hasUntiedDef(Register VirtReg) const { return UntiedDefs.count(VirtReg); } @@ -548,9 +548,9 @@ protected: /// after the current position. SlotIndex getCurrSlot() const; - void increaseRegPressure(unsigned RegUnit, LaneBitmask PreviousMask, + void increaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask); - void decreaseRegPressure(unsigned RegUnit, LaneBitmask PreviousMask, + void decreaseRegPressure(Register RegUnit, LaneBitmask PreviousMask, LaneBitmask NewMask); void bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs); @@ -561,9 +561,9 @@ protected: void discoverLiveInOrOut(RegisterMaskPair Pair, SmallVectorImpl<RegisterMaskPair> &LiveInOrOut); - LaneBitmask getLastUsedLanes(unsigned RegUnit, SlotIndex Pos) const; - LaneBitmask getLiveLanesAt(unsigned RegUnit, SlotIndex Pos) const; - LaneBitmask getLiveThroughAt(unsigned RegUnit, SlotIndex Pos) const; + LaneBitmask getLastUsedLanes(Register RegUnit, SlotIndex Pos) const; + LaneBitmask getLiveLanesAt(Register RegUnit, SlotIndex Pos) const; + LaneBitmask getLiveThroughAt(Register RegUnit, SlotIndex Pos) const; }; void dumpRegSetPressure(ArrayRef<unsigned> SetPressure, diff --git a/llvm/include/llvm/CodeGen/RegisterScavenging.h b/llvm/include/llvm/CodeGen/RegisterScavenging.h index 5b5a80a67e7f..4f48ea2dc8e8 100644 --- a/llvm/include/llvm/CodeGen/RegisterScavenging.h +++ b/llvm/include/llvm/CodeGen/RegisterScavenging.h @@ -89,15 +89,6 @@ public: while (MBBI != I) forward(); } - /// Invert the behavior of forward() on the current instruction (undo the - /// changes to the available registers made by forward()). - void unprocess(); - - /// Unprocess instructions until you reach the provided iterator. - void unprocess(MachineBasicBlock::iterator I) { - while (MBBI != I) unprocess(); - } - /// Update internal register state and move MBB iterator backwards. /// Contrary to unprocess() this method gives precise results even in the /// absence of kill flags. @@ -203,10 +194,10 @@ private: void determineKillsAndDefs(); /// Add all Reg Units that Reg contains to BV. - void addRegUnits(BitVector &BV, Register Reg); + void addRegUnits(BitVector &BV, MCRegister Reg); /// Remove all Reg Units that \p Reg contains from \p BV. - void removeRegUnits(BitVector &BV, Register Reg); + void removeRegUnits(BitVector &BV, MCRegister Reg); /// Return the candidate register that is unused for the longest after /// StartMI. UseMI is set to the instruction where the search stopped. diff --git a/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h b/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h index b38cd4924174..bd63dd875621 100644 --- a/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h +++ b/llvm/include/llvm/CodeGen/ResourcePriorityQueue.h @@ -107,7 +107,6 @@ namespace llvm { /// InitNumRegDefsLeft - Determine the # of regs defined by this node. /// void initNumRegDefsLeft(SUnit *SU); - void updateNumRegDefsLeft(SUnit *SU); int regPressureDelta(SUnit *SU, bool RawPressure = false); int rawRegPressureDelta (SUnit *SU, unsigned RCId); diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h index f71f39e5bf03..86e24cab76f6 100644 --- a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h @@ -15,6 +15,7 @@ #define LLVM_CODEGEN_RUNTIMELIBCALLS_H #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/AtomicOrdering.h" namespace llvm { namespace RTLIB { @@ -60,6 +61,10 @@ namespace RTLIB { /// UNKNOWN_LIBCALL if there is none. Libcall getSYNC(unsigned Opc, MVT VT); + /// Return the outline atomics value for the given opcode, atomic ordering + /// and type, or UNKNOWN_LIBCALL if there is none. + Libcall getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, MVT VT); + /// getMEMCPY_ELEMENT_UNORDERED_ATOMIC - Return /// MEMCPY_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or /// UNKNOW_LIBCALL if there is none. diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h index 1eb9b9f322ba..50b186de2b05 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -268,6 +268,11 @@ namespace llvm { return SU->SchedClass; } + /// IsReachable - Checks if SU is reachable from TargetSU. + bool IsReachable(SUnit *SU, SUnit *TargetSU) { + return Topo.IsReachable(SU, TargetSU); + } + /// Returns an iterator to the top of the current scheduling region. MachineBasicBlock::iterator begin() const { return RegionBegin; } @@ -362,16 +367,6 @@ namespace llvm { void addVRegDefDeps(SUnit *SU, unsigned OperIdx); void addVRegUseDeps(SUnit *SU, unsigned OperIdx); - /// Initializes register live-range state for updating kills. - /// PostRA helper for rewriting kill flags. - void startBlockForKills(MachineBasicBlock *BB); - - /// Toggles a register operand kill flag. - /// - /// Other adjustments may be made to the instruction if necessary. Return - /// true if the operand has been deleted, false if not. - void toggleKillFlag(MachineInstr &MI, MachineOperand &MO); - /// Returns a mask for which lanes get read/written by the given (register) /// machine operand. LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; @@ -393,10 +388,7 @@ namespace llvm { /// Returns an existing SUnit for this MI, or nullptr. inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const { - DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI); - if (I == MISUnitMap.end()) - return nullptr; - return I->second; + return MISUnitMap.lookup(MI); } } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h index 37590f496ca2..9f1101b658d0 100644 --- a/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h +++ b/llvm/include/llvm/CodeGen/ScheduleHazardRecognizer.h @@ -57,7 +57,7 @@ public: /// other instruction is available, issue it first. /// * NoopHazard: issuing this instruction would break the program. If /// some other instruction can be issued, do so, otherwise issue a noop. - virtual HazardType getHazardType(SUnit *m, int Stalls = 0) { + virtual HazardType getHazardType(SUnit *, int Stalls = 0) { return NoHazard; } @@ -114,6 +114,14 @@ public: // Default implementation: count it as a cycle. AdvanceCycle(); } + + /// EmitNoops - This callback is invoked when noops were added to the + /// instruction stream. + virtual void EmitNoops(unsigned Quantity) { + // Default implementation: count it as a cycle. + for (unsigned i = 0; i < Quantity; ++i) + EmitNoop(); + } }; } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index f26ab6f287a0..aeb488dd6c83 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -27,7 +27,6 @@ #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/DAGCombine.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -64,6 +63,7 @@ class ConstantFP; class ConstantInt; class DataLayout; struct fltSemantics; +class FunctionLoweringInfo; class GlobalValue; struct KnownBits; class LegacyDivergenceAnalysis; @@ -331,6 +331,29 @@ public: virtual void anchor(); }; + /// Help to insert SDNodeFlags automatically in transforming. Use + /// RAII to save and resume flags in current scope. + class FlagInserter { + SelectionDAG &DAG; + SDNodeFlags Flags; + FlagInserter *LastInserter; + + public: + FlagInserter(SelectionDAG &SDAG, SDNodeFlags Flags) + : DAG(SDAG), Flags(Flags), + LastInserter(SDAG.getFlagInserter()) { + SDAG.setFlagInserter(this); + } + FlagInserter(SelectionDAG &SDAG, SDNode *N) + : FlagInserter(SDAG, N->getFlags()) {} + + FlagInserter(const FlagInserter &) = delete; + FlagInserter &operator=(const FlagInserter &) = delete; + ~FlagInserter() { DAG.setFlagInserter(LastInserter); } + + const SDNodeFlags getFlags() const { return Flags; } + }; + /// When true, additional steps are taken to /// ensure that getConstant() and similar functions return DAG nodes that /// have legal types. This is important after type legalization since @@ -433,6 +456,9 @@ public: ProfileSummaryInfo *getPSI() const { return PSI; } BlockFrequencyInfo *getBFI() const { return BFI; } + FlagInserter *getFlagInserter() { return Inserter; } + void setFlagInserter(FlagInserter *FI) { Inserter = FI; } + /// Just dump dot graph to a user-provided path and title. /// This doesn't open the dot viewer program and /// helps visualization when outside debugging session. @@ -695,9 +721,7 @@ public: // When generating a branch to a BB, we don't in general know enough // to provide debug info for the BB at that time, so keep this one around. SDValue getBasicBlock(MachineBasicBlock *MBB); - SDValue getBasicBlock(MachineBasicBlock *MBB, SDLoc dl); SDValue getExternalSymbol(const char *Sym, EVT VT); - SDValue getExternalSymbol(const char *Sym, const SDLoc &dl, EVT VT); SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags = 0); SDValue getMCSymbol(MCSymbol *Sym, EVT VT); @@ -870,7 +894,7 @@ public: /// Returns sum of the base pointer and offset. /// Unlike getObjectPtrOffset this does not set NoUnsignedWrap by default. - SDValue getMemBasePlusOffset(SDValue Base, int64_t Offset, const SDLoc &DL, + SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags = SDNodeFlags()); SDValue getMemBasePlusOffset(SDValue Base, SDValue Offset, const SDLoc &DL, const SDNodeFlags Flags = SDNodeFlags()); @@ -878,7 +902,7 @@ public: /// Create an add instruction with appropriate flags when used for /// addressing some offset of an object. i.e. if a load is split into multiple /// components, create an add nuw from the base pointer to the offset. - SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, int64_t Offset) { + SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset) { SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); return getMemBasePlusOffset(Ptr, Offset, SL, Flags); @@ -945,21 +969,31 @@ public: SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef<SDUse> Ops); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - ArrayRef<SDValue> Ops, const SDNodeFlags Flags = SDNodeFlags()); + ArrayRef<SDValue> Ops, const SDNodeFlags Flags); SDValue getNode(unsigned Opcode, const SDLoc &DL, ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops); SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, - ArrayRef<SDValue> Ops, const SDNodeFlags Flags = SDNodeFlags()); + ArrayRef<SDValue> Ops, const SDNodeFlags Flags); + + // Use flags from current flag inserter. + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, + ArrayRef<SDValue> Ops); + SDValue getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, + ArrayRef<SDValue> Ops); + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand); + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, + SDValue N2); + SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, + SDValue N2, SDValue N3); // Specialize based on number of operands. SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand, - const SDNodeFlags Flags = SDNodeFlags()); + const SDNodeFlags Flags); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, - SDValue N2, const SDNodeFlags Flags = SDNodeFlags()); + SDValue N2, const SDNodeFlags Flags); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, - SDValue N2, SDValue N3, - const SDNodeFlags Flags = SDNodeFlags()); + SDValue N2, SDValue N3, const SDNodeFlags Flags); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4); SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, @@ -1169,6 +1203,12 @@ public: SDValue getLifetimeNode(bool IsStart, const SDLoc &dl, SDValue Chain, int FrameIndex, int64_t Size, int64_t Offset = -1); + /// Creates a PseudoProbeSDNode with function GUID `Guid` and + /// the index of the block `Index` it is probing, as well as the attributes + /// `attr` of the probe. + SDValue getPseudoProbeNode(const SDLoc &Dl, SDValue Chain, uint64_t Guid, + uint64_t Index, uint32_t Attr); + /// Create a MERGE_VALUES node from the given operands. SDValue getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl); @@ -1178,14 +1218,15 @@ public: /// This function will set the MOLoad flag on MMOFlags, but you can set it if /// you want. The MOStore flag must not be set. SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, - MachinePointerInfo PtrInfo, MaybeAlign Alignment, + MachinePointerInfo PtrInfo, + MaybeAlign Alignment = MaybeAlign(), MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr); /// FIXME: Remove once transition to Align is over. inline SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, - MachinePointerInfo PtrInfo, unsigned Alignment = 0, + MachinePointerInfo PtrInfo, unsigned Alignment, MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr) { @@ -1197,14 +1238,14 @@ public: SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, - MaybeAlign Alignment, + MaybeAlign Alignment = MaybeAlign(), MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()); /// FIXME: Remove once transition to Align is over. inline SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, - unsigned Alignment = 0, + unsigned Alignment, MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()) { return getExtLoad(ExtType, dl, VT, Chain, Ptr, PtrInfo, MemVT, @@ -1221,13 +1262,12 @@ public: MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr); - inline SDValue - getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, - const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, - MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment, - MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, - const AAMDNodes &AAInfo = AAMDNodes(), - const MDNode *Ranges = nullptr) { + inline SDValue getLoad( + ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, + SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, + EVT MemVT, MaybeAlign Alignment = MaybeAlign(), + MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, + const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr) { // Ensures that codegen never sees a None Alignment. return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, PtrInfo, MemVT, Alignment.getValueOr(getEVTAlign(MemVT)), MMOFlags, AAInfo, @@ -1237,7 +1277,7 @@ public: inline SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, - MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment = 0, + MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment, MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes(), const MDNode *Ranges = nullptr) { @@ -1260,7 +1300,7 @@ public: const AAMDNodes &AAInfo = AAMDNodes()); inline SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, - MachinePointerInfo PtrInfo, MaybeAlign Alignment, + MachinePointerInfo PtrInfo, MaybeAlign Alignment = MaybeAlign(), MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()) { return getStore(Chain, dl, Val, Ptr, PtrInfo, @@ -1270,7 +1310,7 @@ public: /// FIXME: Remove once transition to Align is over. inline SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, - MachinePointerInfo PtrInfo, unsigned Alignment = 0, + MachinePointerInfo PtrInfo, unsigned Alignment, MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()) { return getStore(Chain, dl, Val, Ptr, PtrInfo, MaybeAlign(Alignment), @@ -1285,7 +1325,8 @@ public: const AAMDNodes &AAInfo = AAMDNodes()); inline SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, - MachinePointerInfo PtrInfo, EVT SVT, MaybeAlign Alignment, + MachinePointerInfo PtrInfo, EVT SVT, + MaybeAlign Alignment = MaybeAlign(), MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()) { return getTruncStore(Chain, dl, Val, Ptr, PtrInfo, SVT, @@ -1295,7 +1336,7 @@ public: /// FIXME: Remove once transition to Align is over. inline SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, - MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment = 0, + MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment, MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, const AAMDNodes &AAInfo = AAMDNodes()) { return getTruncStore(Chain, dl, Val, Ptr, PtrInfo, SVT, @@ -1321,10 +1362,11 @@ public: ISD::MemIndexedMode AM); SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, - ISD::MemIndexType IndexType); + ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy); SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO, - ISD::MemIndexType IndexType); + ISD::MemIndexType IndexType, + bool IsTruncating = false); /// Construct a node to track a Value* through the backend. SDValue getSrcValue(const Value *v); @@ -1389,6 +1431,9 @@ public: void setNodeMemRefs(MachineSDNode *N, ArrayRef<MachineMemOperand *> NewMemRefs); + // Calculate divergence of node \p N based on its operands. + bool calculateDivergence(SDNode *N); + // Propagates the change in divergence to users void updateDivergence(SDNode * N); @@ -1409,8 +1454,6 @@ public: EVT VT2, ArrayRef<SDValue> Ops); SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2, EVT VT3, ArrayRef<SDValue> Ops); - SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, - EVT VT2, SDValue Op1); SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2); SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, SDVTList VTs, @@ -1468,8 +1511,13 @@ public: SDValue Operand, SDValue Subreg); /// Get the specified node if it's already available, or else return NULL. - SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops, - const SDNodeFlags Flags = SDNodeFlags()); + SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList, + ArrayRef<SDValue> Ops, const SDNodeFlags Flags); + SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTList, + ArrayRef<SDValue> Ops); + + /// Check if a node exists without modifying its flags. + bool doesNodeExist(unsigned Opcode, SDVTList VTList, ArrayRef<SDValue> Ops); /// Creates a SDDbgValue node. SDDbgValue *getDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N, @@ -1543,7 +1591,14 @@ public: /// chain to the token factor. This ensures that the new memory node will have /// the same relative memory dependency position as the old load. Returns the /// new merged load chain. - SDValue makeEquivalentMemoryOrdering(LoadSDNode *Old, SDValue New); + SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain); + + /// If an existing load has uses of its chain, create a token factor node with + /// that chain and the new memory node's chain and update users of the old + /// chain to the token factor. This ensures that the new memory node will have + /// the same relative memory dependency position as the old load. Returns the + /// new merged load chain. + SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp); /// Topological-sort the AllNodes list and a /// assign a unique node id for each node in the DAG based on their @@ -1781,7 +1836,8 @@ public: /// for \p DemandedElts. /// /// NOTE: The function will return true for a demanded splat of UNDEF values. - bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts); + bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, + unsigned Depth = 0); /// Test whether \p V has a splatted value. bool isSplatValue(SDValue V, bool AllowUndefs = false); @@ -1903,14 +1959,14 @@ public: } /// Test whether the given value is a constant int or similar node. - SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N); + SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) const; /// Test whether the given value is a constant FP or similar node. - SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N); + SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) const ; /// \returns true if \p N is any kind of constant or build_vector of /// constants, int or float. If a vector, it may not necessarily be a splat. - inline bool isConstantValueOfAnyType(SDValue N) { + inline bool isConstantValueOfAnyType(SDValue N) const { return isConstantIntBuildVectorOrConstantInt(N) || isConstantFPBuildVectorOrConstantFP(N); } @@ -1958,6 +2014,10 @@ public: bool shouldOptForSize() const; + /// Get the (commutative) neutral element for the given opcode, if it exists. + SDValue getNeutralElement(unsigned Opcode, const SDLoc &DL, EVT VT, + SDNodeFlags Flags); + private: void InsertNode(SDNode *N); bool RemoveNodeFromCSEMaps(SDNode *N); @@ -1998,6 +2058,8 @@ private: std::map<std::pair<std::string, unsigned>, SDNode *> TargetExternalSymbols; DenseMap<MCSymbol *, SDNode *> MCSymbols; + + FlagInserter *Inserter = nullptr; }; template <> struct GraphTraits<SelectionDAG*> : public GraphTraits<SDNode*> { diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index 3bfbf3765e4f..84bb11edd715 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -323,8 +323,6 @@ private: SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, ArrayRef<SDValue> Ops, unsigned EmitNodeInfo); - SDNode *MutateStrictFPToFP(SDNode *Node, unsigned NewOpc); - /// Prepares the landing pad to take incoming values or do other EH /// personality specific tasks. Returns true if the block should be /// instruction selected, false if no code should be emitted for it. diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 7c2b49087edd..000e383b71eb 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -85,29 +85,42 @@ namespace ISD { /// Node predicates - /// If N is a BUILD_VECTOR node whose elements are all the same constant or - /// undefined, return true and return the constant value in \p SplatValue. - bool isConstantSplatVector(const SDNode *N, APInt &SplatValue); - - /// Return true if the specified node is a BUILD_VECTOR where all of the - /// elements are ~0 or undef. - bool isBuildVectorAllOnes(const SDNode *N); - - /// Return true if the specified node is a BUILD_VECTOR where all of the - /// elements are 0 or undef. - bool isBuildVectorAllZeros(const SDNode *N); - - /// Return true if the specified node is a BUILD_VECTOR node of all - /// ConstantSDNode or undef. - bool isBuildVectorOfConstantSDNodes(const SDNode *N); - - /// Return true if the specified node is a BUILD_VECTOR node of all - /// ConstantFPSDNode or undef. - bool isBuildVectorOfConstantFPSDNodes(const SDNode *N); - - /// Return true if the node has at least one operand and all operands of the - /// specified node are ISD::UNDEF. - bool allOperandsUndef(const SDNode *N); +/// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the +/// same constant or undefined, return true and return the constant value in +/// \p SplatValue. +bool isConstantSplatVector(const SDNode *N, APInt &SplatValue); + +/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where +/// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to +/// true, it only checks BUILD_VECTOR. +bool isConstantSplatVectorAllOnes(const SDNode *N, + bool BuildVectorOnly = false); + +/// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where +/// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it +/// only checks BUILD_VECTOR. +bool isConstantSplatVectorAllZeros(const SDNode *N, + bool BuildVectorOnly = false); + +/// Return true if the specified node is a BUILD_VECTOR where all of the +/// elements are ~0 or undef. +bool isBuildVectorAllOnes(const SDNode *N); + +/// Return true if the specified node is a BUILD_VECTOR where all of the +/// elements are 0 or undef. +bool isBuildVectorAllZeros(const SDNode *N); + +/// Return true if the specified node is a BUILD_VECTOR node of all +/// ConstantSDNode or undef. +bool isBuildVectorOfConstantSDNodes(const SDNode *N); + +/// Return true if the specified node is a BUILD_VECTOR node of all +/// ConstantFPSDNode or undef. +bool isBuildVectorOfConstantFPSDNodes(const SDNode *N); + +/// Return true if the node has at least one operand and all operands of the +/// specified node are ISD::UNDEF. +bool allOperandsUndef(const SDNode *N); } // end namespace ISD @@ -180,8 +193,8 @@ public: return getValueType().getSizeInBits(); } - TypeSize getScalarValueSizeInBits() const { - return getValueType().getScalarType().getSizeInBits(); + uint64_t getScalarValueSizeInBits() const { + return getValueType().getScalarType().getFixedSizeInBits(); } // Forwarding methods - These forward to the corresponding methods in SDNode. @@ -357,11 +370,6 @@ template<> struct simplify_type<SDUse> { /// the backend. struct SDNodeFlags { private: - // This bit is used to determine if the flags are in a defined state. - // Flag bits can only be masked out during intersection if the masking flags - // are defined. - bool AnyDefined : 1; - bool NoUnsignedWrap : 1; bool NoSignedWrap : 1; bool Exact : 1; @@ -383,9 +391,8 @@ private: public: /// Default constructor turns off all optimization flags. SDNodeFlags() - : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false), - Exact(false), NoNaNs(false), NoInfs(false), - NoSignedZeros(false), AllowReciprocal(false), + : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false), + NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), AllowContract(false), ApproximateFuncs(false), AllowReassociation(false), NoFPExcept(false) {} @@ -400,56 +407,18 @@ public: setAllowReassociation(FPMO.hasAllowReassoc()); } - /// Sets the state of the flags to the defined state. - void setDefined() { AnyDefined = true; } - /// Returns true if the flags are in a defined state. - bool isDefined() const { return AnyDefined; } - // These are mutators for each flag. - void setNoUnsignedWrap(bool b) { - setDefined(); - NoUnsignedWrap = b; - } - void setNoSignedWrap(bool b) { - setDefined(); - NoSignedWrap = b; - } - void setExact(bool b) { - setDefined(); - Exact = b; - } - void setNoNaNs(bool b) { - setDefined(); - NoNaNs = b; - } - void setNoInfs(bool b) { - setDefined(); - NoInfs = b; - } - void setNoSignedZeros(bool b) { - setDefined(); - NoSignedZeros = b; - } - void setAllowReciprocal(bool b) { - setDefined(); - AllowReciprocal = b; - } - void setAllowContract(bool b) { - setDefined(); - AllowContract = b; - } - void setApproximateFuncs(bool b) { - setDefined(); - ApproximateFuncs = b; - } - void setAllowReassociation(bool b) { - setDefined(); - AllowReassociation = b; - } - void setNoFPExcept(bool b) { - setDefined(); - NoFPExcept = b; - } + void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } + void setNoSignedWrap(bool b) { NoSignedWrap = b; } + void setExact(bool b) { Exact = b; } + void setNoNaNs(bool b) { NoNaNs = b; } + void setNoInfs(bool b) { NoInfs = b; } + void setNoSignedZeros(bool b) { NoSignedZeros = b; } + void setAllowReciprocal(bool b) { AllowReciprocal = b; } + void setAllowContract(bool b) { AllowContract = b; } + void setApproximateFuncs(bool b) { ApproximateFuncs = b; } + void setAllowReassociation(bool b) { AllowReassociation = b; } + void setNoFPExcept(bool b) { NoFPExcept = b; } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } @@ -464,11 +433,9 @@ public: bool hasAllowReassociation() const { return AllowReassociation; } bool hasNoFPExcept() const { return NoFPExcept; } - /// Clear any flags in this flag set that aren't also set in Flags. - /// If the given Flags are undefined then don't do anything. + /// Clear any flags in this flag set that aren't also set in Flags. All + /// flags will be cleared if Flags are undefined. void intersectWith(const SDNodeFlags Flags) { - if (!Flags.isDefined()) - return; NoUnsignedWrap &= Flags.NoUnsignedWrap; NoSignedWrap &= Flags.NoSignedWrap; Exact &= Flags.Exact; @@ -559,6 +526,7 @@ BEGIN_TWO_BYTE_PACK() class LoadSDNodeBitfields { friend class LoadSDNode; friend class MaskedLoadSDNode; + friend class MaskedGatherSDNode; uint16_t : NumLSBaseSDNodeBits; @@ -569,6 +537,7 @@ BEGIN_TWO_BYTE_PACK() class StoreSDNodeBitfields { friend class StoreSDNode; friend class MaskedStoreSDNode; + friend class MaskedScatterSDNode; uint16_t : NumLSBaseSDNodeBits; @@ -720,9 +689,7 @@ public: bool use_empty() const { return UseList == nullptr; } /// Return true if there is exactly one use of this node. - bool hasOneUse() const { - return !use_empty() && std::next(use_begin()) == use_end(); - } + bool hasOneUse() const { return hasSingleElement(uses()); } /// Return the number of uses of this node. This method takes /// time proportional to the number of uses. @@ -1379,8 +1346,18 @@ public: } const SDValue &getChain() const { return getOperand(0); } + const SDValue &getBasePtr() const { - return getOperand(getOpcode() == ISD::STORE ? 2 : 1); + switch (getOpcode()) { + case ISD::STORE: + case ISD::MSTORE: + return getOperand(2); + case ISD::MGATHER: + case ISD::MSCATTER: + return getOperand(3); + default: + return getOperand(1); + } } // Methods to support isa and dyn_cast @@ -1784,6 +1761,32 @@ public: } }; +/// This SDNode is used for PSEUDO_PROBE values, which are the function guid and +/// the index of the basic block being probed. A pseudo probe serves as a place +/// holder and will be removed at the end of compilation. It does not have any +/// operand because we do not want the instruction selection to deal with any. +class PseudoProbeSDNode : public SDNode { + friend class SelectionDAG; + uint64_t Guid; + uint64_t Index; + uint32_t Attributes; + + PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl, + SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr) + : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index), + Attributes(Attr) {} + +public: + uint64_t getGuid() const { return Guid; } + uint64_t getIndex() const { return Index; } + uint32_t getAttributes() const { return Attributes; } + + // Methods to support isa and dyn_cast + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::PSEUDO_PROBE; + } +}; + class JumpTableSDNode : public SDNode { friend class SelectionDAG; @@ -1944,6 +1947,33 @@ public: /// the vector width and set the bits where elements are undef. SDValue getSplatValue(BitVector *UndefElements = nullptr) const; + /// Find the shortest repeating sequence of values in the build vector. + /// + /// e.g. { u, X, u, X, u, u, X, u } -> { X } + /// { X, Y, u, Y, u, u, X, u } -> { X, Y } + /// + /// Currently this must be a power-of-2 build vector. + /// The DemandedElts mask indicates the elements that must be present, + /// undemanded elements in Sequence may be null (SDValue()). If passed a + /// non-null UndefElements bitvector, it will resize it to match the original + /// vector width and set the bits where elements are undef. If result is + /// false, Sequence will be empty. + bool getRepeatedSequence(const APInt &DemandedElts, + SmallVectorImpl<SDValue> &Sequence, + BitVector *UndefElements = nullptr) const; + + /// Find the shortest repeating sequence of values in the build vector. + /// + /// e.g. { u, X, u, X, u, u, X, u } -> { X } + /// { X, Y, u, Y, u, u, X, u } -> { X, Y } + /// + /// Currently this must be a power-of-2 build vector. + /// If passed a non-null UndefElements bitvector, it will resize it to match + /// the original vector width and set the bits where elements are undef. + /// If result is false, Sequence will be empty. + bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence, + BitVector *UndefElements = nullptr) const; + /// Returns the demanded splatted constant or null if this is not a constant /// splat. /// @@ -2292,9 +2322,6 @@ public: // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru) // MaskedStoreSDNode (Chain, data, ptr, offset, mask) // Mask is a vector of i1 elements - const SDValue &getBasePtr() const { - return getOperand(getOpcode() == ISD::MLOAD ? 1 : 2); - } const SDValue &getOffset() const { return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3); } @@ -2402,6 +2429,9 @@ public: ISD::MemIndexType getIndexType() const { return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode); } + void setIndexType(ISD::MemIndexType IndexType) { + LSBaseSDNodeBits.AddressingMode = IndexType; + } bool isIndexScaled() const { return (getIndexType() == ISD::SIGNED_SCALED) || (getIndexType() == ISD::UNSIGNED_SCALED); @@ -2434,12 +2464,18 @@ public: MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO, - ISD::MemIndexType IndexType) + ISD::MemIndexType IndexType, ISD::LoadExtType ETy) : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO, - IndexType) {} + IndexType) { + LoadSDNodeBits.ExtTy = ETy; + } const SDValue &getPassThru() const { return getOperand(1); } + ISD::LoadExtType getExtensionType() const { + return ISD::LoadExtType(LoadSDNodeBits.ExtTy); + } + static bool classof(const SDNode *N) { return N->getOpcode() == ISD::MGATHER; } @@ -2453,9 +2489,16 @@ public: MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, MachineMemOperand *MMO, - ISD::MemIndexType IndexType) + ISD::MemIndexType IndexType, bool IsTrunc) : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO, - IndexType) {} + IndexType) { + StoreSDNodeBits.IsTruncating = IsTrunc; + } + + /// Return true if the op does a truncation before store. + /// For integers this is the same as doing a TRUNCATE and storing the result. + /// For floats, it is the same as doing an FP_ROUND and storing the result. + bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } const SDValue &getValue() const { return getOperand(1); } @@ -2605,7 +2648,8 @@ template <> struct GraphTraits<SDNode*> { /// with 4 and 8 byte pointer alignment, respectively. using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode, BlockAddressSDNode, - GlobalAddressSDNode>; + GlobalAddressSDNode, + PseudoProbeSDNode>; /// The SDNode class with the greatest alignment requirement. using MostAlignedSDNode = GlobalAddressSDNode; diff --git a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h index 014523f1af6a..78f6fc6656fa 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGTargetInfo.h @@ -85,7 +85,7 @@ public: return SDValue(); } - /// Emit target-specific code that performs a memcmp, in cases where that is + /// Emit target-specific code that performs a memcmp/bcmp, in cases where that is /// faster than a libcall. The first returned SDValue is the result of the /// memcmp and the second is the chain. Both SDValues can be null if a normal /// libcall should be used. diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h index 19eab7ae5e35..b2133de93ea2 100644 --- a/llvm/include/llvm/CodeGen/SlotIndexes.h +++ b/llvm/include/llvm/CodeGen/SlotIndexes.h @@ -604,38 +604,27 @@ class raw_ostream; } /// Add the given MachineBasicBlock into the maps. - /// If \p InsertionPoint is specified then the block will be placed - /// before the given machine instr, otherwise it will be placed - /// before the next block in MachineFunction insertion order. - void insertMBBInMaps(MachineBasicBlock *mbb, - MachineInstr *InsertionPoint = nullptr) { - MachineFunction::iterator nextMBB = - std::next(MachineFunction::iterator(mbb)); - - IndexListEntry *startEntry = nullptr; - IndexListEntry *endEntry = nullptr; - IndexList::iterator newItr; - if (InsertionPoint) { - startEntry = createEntry(nullptr, 0); - endEntry = getInstructionIndex(*InsertionPoint).listEntry(); - newItr = indexList.insert(endEntry->getIterator(), startEntry); - } else if (nextMBB == mbb->getParent()->end()) { - startEntry = &indexList.back(); - endEntry = createEntry(nullptr, 0); - newItr = indexList.insertAfter(startEntry->getIterator(), endEntry); - } else { - startEntry = createEntry(nullptr, 0); - endEntry = getMBBStartIdx(&*nextMBB).listEntry(); - newItr = indexList.insert(endEntry->getIterator(), startEntry); - } + /// If it contains any instructions then they must already be in the maps. + /// This is used after a block has been split by moving some suffix of its + /// instructions into a newly created block. + void insertMBBInMaps(MachineBasicBlock *mbb) { + assert(mbb != &mbb->getParent()->front() && + "Can't insert a new block at the beginning of a function."); + auto prevMBB = std::prev(MachineFunction::iterator(mbb)); + + // Create a new entry to be used for the start of mbb and the end of + // prevMBB. + IndexListEntry *startEntry = createEntry(nullptr, 0); + IndexListEntry *endEntry = getMBBEndIdx(&*prevMBB).listEntry(); + IndexListEntry *insEntry = + mbb->empty() ? endEntry + : getInstructionIndex(mbb->front()).listEntry(); + IndexList::iterator newItr = + indexList.insert(insEntry->getIterator(), startEntry); SlotIndex startIdx(startEntry, SlotIndex::Slot_Block); SlotIndex endIdx(endEntry, SlotIndex::Slot_Block); - MachineFunction::iterator prevMBB(mbb); - assert(prevMBB != mbb->getParent()->end() && - "Can't insert a new block at the beginning of a function."); - --prevMBB; MBBRanges[prevMBB->getNumber()].second = startIdx; assert(unsigned(mbb->getNumber()) == MBBRanges.size() && diff --git a/llvm/include/llvm/CodeGen/StableHashing.h b/llvm/include/llvm/CodeGen/StableHashing.h new file mode 100644 index 000000000000..caf27e152e78 --- /dev/null +++ b/llvm/include/llvm/CodeGen/StableHashing.h @@ -0,0 +1,112 @@ +//===- llvm/CodeGen/StableHashing.h - Utilities for stable hashing * C++ *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides types and functions for computing and combining stable +// hashes. Stable hashes can be useful for hashing across different modules, +// processes, or compiler runs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_STABLEHASHING_H +#define LLVM_CODEGEN_STABLEHASHING_H + +#include "llvm/ADT/StringRef.h" + +namespace llvm { + +/// An opaque object representing a stable hash code. It can be serialized, +/// deserialized, and is stable across processes and executions. +using stable_hash = uint64_t; + +// Implementation details +namespace hashing { +namespace detail { + +// Stable hashes are based on the 64-bit FNV-1 hash: +// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function + +const uint64_t FNV_PRIME_64 = 1099511628211u; +const uint64_t FNV_OFFSET_64 = 14695981039346656037u; + +inline void stable_hash_append(stable_hash &Hash, const char Value) { + Hash = Hash ^ (Value & 0xFF); + Hash = Hash * FNV_PRIME_64; +} + +inline void stable_hash_append(stable_hash &Hash, stable_hash Value) { + for (unsigned I = 0; I < 8; ++I) { + stable_hash_append(Hash, static_cast<char>(Value)); + Value >>= 8; + } +} + +} // namespace detail +} // namespace hashing + +inline stable_hash stable_hash_combine(stable_hash A, stable_hash B) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + hashing::detail::stable_hash_append(Hash, A); + hashing::detail::stable_hash_append(Hash, B); + return Hash; +} + +inline stable_hash stable_hash_combine(stable_hash A, stable_hash B, + stable_hash C) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + hashing::detail::stable_hash_append(Hash, A); + hashing::detail::stable_hash_append(Hash, B); + hashing::detail::stable_hash_append(Hash, C); + return Hash; +} + +inline stable_hash stable_hash_combine(stable_hash A, stable_hash B, + stable_hash C, stable_hash D) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + hashing::detail::stable_hash_append(Hash, A); + hashing::detail::stable_hash_append(Hash, B); + hashing::detail::stable_hash_append(Hash, C); + hashing::detail::stable_hash_append(Hash, D); + return Hash; +} + +/// Compute a stable_hash for a sequence of values. +/// +/// This hashes a sequence of values. It produces the same stable_hash as +/// 'stable_hash_combine(a, b, c, ...)', but can run over arbitrary sized +/// sequences and is significantly faster given pointers and types which +/// can be hashed as a sequence of bytes. +template <typename InputIteratorT> +stable_hash stable_hash_combine_range(InputIteratorT First, + InputIteratorT Last) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + for (auto I = First; I != Last; ++I) + hashing::detail::stable_hash_append(Hash, *I); + return Hash; +} + +inline stable_hash stable_hash_combine_array(const stable_hash *P, size_t C) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + for (size_t I = 0; I < C; ++I) + hashing::detail::stable_hash_append(Hash, P[I]); + return Hash; +} + +inline stable_hash stable_hash_combine_string(const StringRef &S) { + return stable_hash_combine_range(S.begin(), S.end()); +} + +inline stable_hash stable_hash_combine_string(const char *C) { + stable_hash Hash = hashing::detail::FNV_OFFSET_64; + while (*C) + hashing::detail::stable_hash_append(Hash, *(C++)); + return Hash; +} + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/CodeGen/StackMaps.h b/llvm/include/llvm/CodeGen/StackMaps.h index e33ee226e41a..928d7cc6cc04 100644 --- a/llvm/include/llvm/CodeGen/StackMaps.h +++ b/llvm/include/llvm/CodeGen/StackMaps.h @@ -148,9 +148,13 @@ public: /// <StackMaps::ConstantOp>, <calling convention>, /// <StackMaps::ConstantOp>, <statepoint flags>, /// <StackMaps::ConstantOp>, <num deopt args>, [deopt args...], -/// <gc base/derived pairs...> <gc allocas...> -/// Note that the last two sets of arguments are not currently length -/// prefixed. +/// <StackMaps::ConstantOp>, <num gc pointer args>, [gc pointer args...], +/// <StackMaps::ConstantOp>, <num gc allocas>, [gc allocas args...], +/// <StackMaps::ConstantOp>, <num entries in gc map>, [base/derived pairs] +/// base/derived pairs in gc map are logical indices into <gc pointer args> +/// section. +/// All gc pointers assigned to VRegs produce new value (in form of MI Def +/// operand) and are tied to it. class StatepointOpers { // TODO:: we should change the STATEPOINT representation so that CC and // Flags should be part of meta operands, with args and deopt operands, and @@ -166,21 +170,23 @@ class StatepointOpers { enum { CCOffset = 1, FlagsOffset = 3, NumDeoptOperandsOffset = 5 }; public: - explicit StatepointOpers(const MachineInstr *MI) : MI(MI) {} + explicit StatepointOpers(const MachineInstr *MI) : MI(MI) { + NumDefs = MI->getNumDefs(); + } /// Get index of statepoint ID operand. - unsigned getIDPos() const { return IDPos; } + unsigned getIDPos() const { return NumDefs + IDPos; } /// Get index of Num Patch Bytes operand. - unsigned getNBytesPos() const { return NBytesPos; } + unsigned getNBytesPos() const { return NumDefs + NBytesPos; } /// Get index of Num Call Arguments operand. - unsigned getNCallArgsPos() const { return NCallArgsPos; } + unsigned getNCallArgsPos() const { return NumDefs + NCallArgsPos; } /// Get starting index of non call related arguments /// (calling convention, statepoint flags, vm state and gc state). unsigned getVarIdx() const { - return MI->getOperand(NCallArgsPos).getImm() + MetaEnd; + return MI->getOperand(NumDefs + NCallArgsPos).getImm() + MetaEnd + NumDefs; } /// Get index of Calling Convention operand. @@ -195,16 +201,16 @@ public: } /// Return the ID for the given statepoint. - uint64_t getID() const { return MI->getOperand(IDPos).getImm(); } + uint64_t getID() const { return MI->getOperand(NumDefs + IDPos).getImm(); } /// Return the number of patchable bytes the given statepoint should emit. uint32_t getNumPatchBytes() const { - return MI->getOperand(NBytesPos).getImm(); + return MI->getOperand(NumDefs + NBytesPos).getImm(); } /// Return the target of the underlying call. const MachineOperand &getCallTarget() const { - return MI->getOperand(CallTargetPos); + return MI->getOperand(NumDefs + CallTargetPos); } /// Return the calling convention. @@ -215,8 +221,31 @@ public: /// Return the statepoint flags. uint64_t getFlags() const { return MI->getOperand(getFlagsIdx()).getImm(); } + uint64_t getNumDeoptArgs() const { + return MI->getOperand(getNumDeoptArgsIdx()).getImm(); + } + + /// Get index of number of gc map entries. + unsigned getNumGcMapEntriesIdx(); + + /// Get index of number of gc allocas. + unsigned getNumAllocaIdx(); + + /// Get index of number of GC pointers. + unsigned getNumGCPtrIdx(); + + /// Get index of first GC pointer operand of -1 if there are none. + int getFirstGCPtrIdx(); + + /// Get vector of base/derived pairs from statepoint. + /// Elements are indices into GC Pointer operand list (logical). + /// Returns number of elements in GCMap. + unsigned + getGCPointerMap(SmallVectorImpl<std::pair<unsigned, unsigned>> &GCMap); + private: const MachineInstr *MI; + unsigned NumDefs; }; class StackMaps { @@ -258,6 +287,10 @@ public: StackMaps(AsmPrinter &AP); + /// Get index of next meta operand. + /// Similar to parseOperand, but does not actually parses operand meaning. + static unsigned getNextMetaArgIdx(const MachineInstr *MI, unsigned CurIdx); + void reset() { CSInfos.clear(); ConstPool.clear(); @@ -330,6 +363,13 @@ private: MachineInstr::const_mop_iterator MOE, LocationVec &Locs, LiveOutVec &LiveOuts) const; + /// Specialized parser of statepoint operands. + /// They do not directly correspond to StackMap record entries. + void parseStatepointOpers(const MachineInstr &MI, + MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, + LocationVec &Locations, LiveOutVec &LiveOuts); + /// Create a live-out register record for the given register @p Reg. LiveOutReg createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const; diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h index 4d6afa617d3a..51f1d7d6fd21 100644 --- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h +++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h @@ -10,16 +10,21 @@ #define LLVM_CODEGEN_SWITCHLOWERINGUTILS_H #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/TargetLowering.h" -#include "llvm/IR/Constants.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/Support/BranchProbability.h" +#include <vector> namespace llvm { +class BlockFrequencyInfo; +class ConstantInt; class FunctionLoweringInfo; class MachineBasicBlock; -class BlockFrequencyInfo; +class ProfileSummaryInfo; +class TargetLowering; +class TargetMachine; namespace SwitchCG { diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h index 347d7ff40404..df974b499851 100644 --- a/llvm/include/llvm/CodeGen/TargetCallingConv.h +++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h @@ -31,6 +31,7 @@ namespace ISD { unsigned IsInReg : 1; ///< Passed in register unsigned IsSRet : 1; ///< Hidden struct-ret ptr unsigned IsByVal : 1; ///< Struct passed by value + unsigned IsByRef : 1; ///< Passed in memory unsigned IsNest : 1; ///< Nested fn static chain unsigned IsReturned : 1; ///< Always returned unsigned IsSplit : 1; @@ -43,25 +44,31 @@ namespace ISD { unsigned IsHva : 1; ///< HVA field for unsigned IsHvaStart : 1; ///< HVA structure start unsigned IsSecArgPass : 1; ///< Second argument - unsigned ByValAlign : 4; ///< Log 2 of byval alignment + unsigned ByValOrByRefAlign : 4; ///< Log 2 of byval/byref alignment unsigned OrigAlign : 5; ///< Log 2 of original alignment unsigned IsInConsecutiveRegsLast : 1; unsigned IsInConsecutiveRegs : 1; unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate unsigned IsPointer : 1; - unsigned ByValSize; ///< Byval struct size + unsigned ByValOrByRefSize; ///< Byval or byref struct size unsigned PointerAddrSpace; ///< Address space of pointer argument + /// Set the alignment used by byref or byval parameters. + void setAlignImpl(Align A) { + ByValOrByRefAlign = encode(A); + assert(getNonZeroByValAlign() == A && "bitfield overflow"); + } + public: ArgFlagsTy() - : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0), - IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0), + : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsByRef(0), + IsNest(0), IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0), IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0), - IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0), - IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), - IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0), + IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValOrByRefAlign(0), + OrigAlign(0), IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), + IsCopyElisionCandidate(0), IsPointer(0), ByValOrByRefSize(0), PointerAddrSpace(0) { static_assert(sizeof(*this) == 3 * sizeof(unsigned), "flags are too big"); } @@ -81,6 +88,9 @@ namespace ISD { bool isByVal() const { return IsByVal; } void setByVal() { IsByVal = 1; } + bool isByRef() const { return IsByRef; } + void setByRef() { IsByRef = 1; } + bool isInAlloca() const { return IsInAlloca; } void setInAlloca() { IsInAlloca = 1; } @@ -112,10 +122,12 @@ namespace ISD { void setReturned() { IsReturned = 1; } bool isInConsecutiveRegs() const { return IsInConsecutiveRegs; } - void setInConsecutiveRegs() { IsInConsecutiveRegs = 1; } + void setInConsecutiveRegs(bool Flag = true) { IsInConsecutiveRegs = Flag; } bool isInConsecutiveRegsLast() const { return IsInConsecutiveRegsLast; } - void setInConsecutiveRegsLast() { IsInConsecutiveRegsLast = 1; } + void setInConsecutiveRegsLast(bool Flag = true) { + IsInConsecutiveRegsLast = Flag; + } bool isSplit() const { return IsSplit; } void setSplit() { IsSplit = 1; } @@ -131,17 +143,22 @@ namespace ISD { LLVM_ATTRIBUTE_DEPRECATED(unsigned getByValAlign() const, "Use getNonZeroByValAlign() instead") { - MaybeAlign A = decodeMaybeAlign(ByValAlign); + MaybeAlign A = decodeMaybeAlign(ByValOrByRefAlign); return A ? A->value() : 0; } Align getNonZeroByValAlign() const { - MaybeAlign A = decodeMaybeAlign(ByValAlign); + MaybeAlign A = decodeMaybeAlign(ByValOrByRefAlign); assert(A && "ByValAlign must be defined"); return *A; } void setByValAlign(Align A) { - ByValAlign = encode(A); - assert(getNonZeroByValAlign() == A && "bitfield overflow"); + assert(isByVal() && !isByRef()); + setAlignImpl(A); + } + + void setByRefAlign(Align A) { + assert(!isByVal() && isByRef()); + setAlignImpl(A); } LLVM_ATTRIBUTE_DEPRECATED(unsigned getOrigAlign() const, @@ -157,8 +174,23 @@ namespace ISD { assert(getNonZeroOrigAlign() == A && "bitfield overflow"); } - unsigned getByValSize() const { return ByValSize; } - void setByValSize(unsigned S) { ByValSize = S; } + unsigned getByValSize() const { + assert(isByVal() && !isByRef()); + return ByValOrByRefSize; + } + void setByValSize(unsigned S) { + assert(isByVal() && !isByRef()); + ByValOrByRefSize = S; + } + + unsigned getByRefSize() const { + assert(!isByVal() && isByRef()); + return ByValOrByRefSize; + } + void setByRefSize(unsigned S) { + assert(!isByVal() && isByRef()); + ByValOrByRefSize = S; + } unsigned getPointerAddrSpace() const { return PointerAddrSpace; } void setPointerAddrSpace(unsigned AS) { PointerAddrSpace = AS; } diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h index c3a11b199675..792452f6e81d 100644 --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -14,6 +14,7 @@ #define LLVM_CODEGEN_TARGETFRAMELOWERING_H #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/Support/TypeSize.h" #include <vector> namespace llvm { @@ -26,7 +27,7 @@ namespace TargetStackID { enum Value { Default = 0, SGPRSpill = 1, - SVEVector = 2, + ScalableVector = 2, NoAlloc = 255 }; } @@ -134,6 +135,12 @@ public: /// was called). virtual unsigned getStackAlignmentSkew(const MachineFunction &MF) const; + /// This method returns whether or not it is safe for an object with the + /// given stack id to be bundled into the local area. + virtual bool isStackIdSafeForLocalArea(unsigned StackId) const { + return true; + } + /// getOffsetOfLocalArea - This method returns the offset of the local area /// from the stack pointer on entrance to a function. /// @@ -291,8 +298,8 @@ public: /// getFrameIndexReference - This method should return the base register /// and offset used to reference a frame index location. The offset is /// returned directly, and the base register is returned via FrameReg. - virtual int getFrameIndexReference(const MachineFunction &MF, int FI, - Register &FrameReg) const; + virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const; /// Same as \c getFrameIndexReference, except that the stack pointer (as /// opposed to the frame pointer) will be the preferred value for \p @@ -300,9 +307,10 @@ public: /// use offsets from RSP. If \p IgnoreSPUpdates is true, the returned /// offset is only guaranteed to be valid with respect to the value of SP at /// the end of the prologue. - virtual int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, - Register &FrameReg, - bool IgnoreSPUpdates) const { + virtual StackOffset + getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, + Register &FrameReg, + bool IgnoreSPUpdates) const { // Always safe to dispatch to getFrameIndexReference. return getFrameIndexReference(MF, FI, FrameReg); } @@ -310,8 +318,8 @@ public: /// getNonLocalFrameIndexReference - This method returns the offset used to /// reference a frame index location. The offset can be from either FP/BP/SP /// based on which base register is returned by llvm.localaddress. - virtual int getNonLocalFrameIndexReference(const MachineFunction &MF, - int FI) const { + virtual StackOffset getNonLocalFrameIndexReference(const MachineFunction &MF, + int FI) const { // By default, dispatch to getFrameIndexReference. Interested targets can // override this. Register FrameReg; diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index b3b2fa218627..36afdefd27b2 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOutliner.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/BranchProbability.h" @@ -80,6 +81,15 @@ struct RegImmPair { RegImmPair(Register Reg, int64_t Imm) : Reg(Reg), Imm(Imm) {} }; +/// Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare. +/// It holds the register values, the scale value and the displacement. +struct ExtAddrMode { + Register BaseReg; + Register ScaledReg; + int64_t Scale; + int64_t Displacement; +}; + //--------------------------------------------------------------------------- /// /// TargetInstrInfo - Interface to description of machine instruction set @@ -339,6 +349,12 @@ public: unsigned &Size, unsigned &Offset, const MachineFunction &MF) const; + /// Return true if the given instruction is terminator that is unspillable, + /// according to isUnspillableTerminatorImpl. + bool isUnspillableTerminator(const MachineInstr *MI) const { + return MI->isTerminator() && isUnspillableTerminatorImpl(MI); + } + /// Returns the size in bytes of the specified MachineInstr, or ~0U /// when this function is not implemented by a target. virtual unsigned getInstSizeInBytes(const MachineInstr &MI) const { @@ -724,7 +740,7 @@ public: return nullptr; } - /// Analyze the loop code, return true if it cannot be understoo. Upon + /// Analyze the loop code, return true if it cannot be understood. Upon /// success, this function returns false and returns information about the /// induction variable and compare instruction used at the end. virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, @@ -771,7 +787,7 @@ public: /// Second variant of isProfitableToIfCvt. This one /// checks for the case where two basic blocks from true and false path - /// of a if-then-else (diamond) are predicated on mutally exclusive + /// of a if-then-else (diamond) are predicated on mutually exclusive /// predicates, where the probability of the true path being taken is given /// by Probability, and Confidence is a measure of our confidence that it /// will be properly predicted. @@ -945,6 +961,17 @@ protected: return None; } + /// Return true if the given terminator MI is not expected to spill. This + /// sets the live interval as not spillable and adjusts phi node lowering to + /// not introduce copies after the terminator. Use with care, these are + /// currently used for hardware loop intrinsics in very controlled situations, + /// created prior to registry allocation in loops that only have single phi + /// users for the terminators value. They may run out of registers if not used + /// carefully. + virtual bool isUnspillableTerminatorImpl(const MachineInstr *MI) const { + return false; + } + public: /// If the specific machine instruction is a instruction that moves/copies /// value from one register to another register return destination and source @@ -968,6 +995,15 @@ public: return None; } + /// Returns true if MI is an instruction that defines Reg to have a constant + /// value and the value is recorded in ImmVal. The ImmVal is a result that + /// should be interpreted as modulo size of Reg. + virtual bool getConstValDefinedInReg(const MachineInstr &MI, + const Register Reg, + int64_t &ImmVal) const { + return false; + } + /// Store the specified register of the given register class to the specified /// stack frame index. The store instruction is to be added to the given /// machine basic block before the specified machine instruction. If isKill @@ -1041,9 +1077,23 @@ public: /// faster sequence. /// \param Root - Instruction that could be combined with one of its operands /// \param Patterns - Vector of possible combination patterns - virtual bool getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl<MachineCombinerPattern> &Patterns) const; + virtual bool + getMachineCombinerPatterns(MachineInstr &Root, + SmallVectorImpl<MachineCombinerPattern> &Patterns, + bool DoRegPressureReduce) const; + + /// Return true if target supports reassociation of instructions in machine + /// combiner pass to reduce register pressure for a given BB. + virtual bool + shouldReduceRegisterPressure(MachineBasicBlock *MBB, + RegisterClassInfo *RegClassInfo) const { + return false; + } + + /// Fix up the placeholder we may add in genAlternativeCodeSequence(). + virtual void + finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P, + SmallVectorImpl<MachineInstr *> &InsInstrs) const {} /// Return true when a code sequence can improve throughput. It /// should be called only for instructions in loops. @@ -1248,10 +1298,11 @@ public: bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const; - /// Get the base operands and byte offset of an instruction that reads/writes - /// memory. + /// Get zero or more base operands and the byte offset of an instruction that + /// reads/writes memory. Note that there may be zero base operands if the + /// instruction accesses a constant address. /// It returns false if MI does not read/write memory. - /// It returns false if no base operands and offset was found. + /// It returns false if base operands and offset could not be determined. /// It is not guaranteed to always recognize base operands and offsets in all /// cases. virtual bool getMemOperandsWithOffsetWidth( @@ -1270,6 +1321,27 @@ public: return false; } + /// Target dependent implementation to get the values constituting the address + /// MachineInstr that is accessing memory. These values are returned as a + /// struct ExtAddrMode which contains all relevant information to make up the + /// address. + virtual Optional<ExtAddrMode> + getAddrModeFromMemoryOp(const MachineInstr &MemI, + const TargetRegisterInfo *TRI) const { + return None; + } + + /// Returns true if MI's Def is NullValueReg, and the MI + /// does not change the Zero value. i.e. cases such as rax = shr rax, X where + /// NullValueReg = rax. Note that if the NullValueReg is non-zero, this + /// function can return true even if becomes zero. Specifically cases such as + /// NullValueReg = shl NullValueReg, 63. + virtual bool preservesZeroValueInReg(const MachineInstr *MI, + const Register NullValueReg, + const TargetRegisterInfo *TRI) const { + return false; + } + /// If the instruction is an increment of a constant value, return the amount. virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const { return false; @@ -1304,6 +1376,11 @@ public: virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; + /// Insert noops into the instruction stream at the specified point. + virtual void insertNoops(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned Quantity) const; + /// Return the noop instruction to use for a noop. virtual void getNoop(MCInst &NopInst) const; @@ -1355,8 +1432,13 @@ public: /// If the specified instruction defines any predicate /// or condition code register(s) used for predication, returns true as well /// as the definition predicate(s) by reference. - virtual bool DefinesPredicate(MachineInstr &MI, - std::vector<MachineOperand> &Pred) const { + /// SkipDead should be set to false at any point that dead + /// predicate instructions should be considered as being defined. + /// A dead predicate instruction is one that is guaranteed to be removed + /// after a call to PredicateInstruction. + virtual bool ClobbersPredicate(MachineInstr &MI, + std::vector<MachineOperand> &Pred, + bool SkipDead) const { return false; } @@ -1442,7 +1524,7 @@ public: /// the machine instruction generated due to folding. virtual MachineInstr *optimizeLoadInstr(MachineInstr &MI, const MachineRegisterInfo *MRI, - unsigned &FoldAsLoadDefReg, + Register &FoldAsLoadDefReg, MachineInstr *&DefMI) const { return nullptr; } @@ -1627,7 +1709,7 @@ public: /// This hook works similarly to getPartialRegUpdateClearance, except that it /// does not take an operand index. Instead sets \p OpNum to the index of the /// unused register. - virtual unsigned getUndefRegClearance(const MachineInstr &MI, unsigned &OpNum, + virtual unsigned getUndefRegClearance(const MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { // The default implementation returns 0 for no undef register dependency. return 0; @@ -1688,6 +1770,21 @@ public: return 5; } + /// Return the maximal number of alias checks on memory operands. For + /// instructions with more than one memory operands, the alias check on a + /// single MachineInstr pair has quadratic overhead and results in + /// unacceptable performance in the worst case. The limit here is to clamp + /// that maximal checks performed. Usually, that's the product of memory + /// operand numbers from that pair of MachineInstr to be checked. For + /// instance, with two MachineInstrs with 4 and 5 memory operands + /// correspondingly, a total of 20 checks are required. With this limit set to + /// 16, their alias check is skipped. We choose to limit the product instead + /// of the individual instruction as targets may have special MachineInstrs + /// with a considerably high number of memory operands, such as `ldm` in ARM. + /// Setting this limit per MachineInstr would result in either too high + /// overhead or too rigid restriction. + virtual unsigned getMemOperandAACheckLimit() const { return 16; } + /// Return an array that contains the ids of the target indices (used for the /// TargetIndex machine operand) and their names. /// diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 06f2b3ca38ea..c3221aac8eea 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -278,6 +278,7 @@ public: bool IsSRet : 1; bool IsNest : 1; bool IsByVal : 1; + bool IsByRef : 1; bool IsInAlloca : 1; bool IsPreallocated : 1; bool IsReturned : 1; @@ -290,7 +291,7 @@ public: ArgListEntry() : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), - IsNest(false), IsByVal(false), IsInAlloca(false), + IsNest(false), IsByVal(false), IsByRef(false), IsInAlloca(false), IsPreallocated(false), IsReturned(false), IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {} @@ -374,6 +375,13 @@ public: EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes = true) const; + /// Return the preferred type to use for a shift opcode, given the shifted + /// amount type is \p ShiftValueTy. + LLVM_READONLY + virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const { + return ShiftValueTy; + } + /// Returns the type to be used for the index operand of: /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR @@ -419,7 +427,7 @@ public: virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const { // The default action for one element vectors is to scalarize - if (VT.getVectorElementCount() == 1) + if (VT.getVectorElementCount().isScalar()) return TypeScalarizeVector; // The default action for an odd-width vector is to widen. if (!VT.isPow2VectorType()) @@ -597,6 +605,12 @@ public: return false; } + /// Return the maximum number of "x & (x - 1)" operations that can be done + /// instead of deferring to a custom CTPOP. + virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const { + return 1; + } + /// Return true if instruction generated for equality comparison is folded /// with instruction generated for signed comparison. virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; } @@ -1085,8 +1099,13 @@ public: /// Return true if the specified operation is legal on this target or can be /// made legal with custom lowering. This is used to help guide high-level - /// lowering decisions. - bool isOperationLegalOrCustom(unsigned Op, EVT VT) const { + /// lowering decisions. LegalOnly is an optional convenience for code paths + /// traversed pre and post legalisation. + bool isOperationLegalOrCustom(unsigned Op, EVT VT, + bool LegalOnly = false) const { + if (LegalOnly) + return isOperationLegal(Op, VT); + return (VT == MVT::Other || isTypeLegal(VT)) && (getOperationAction(Op, VT) == Legal || getOperationAction(Op, VT) == Custom); @@ -1094,8 +1113,13 @@ public: /// Return true if the specified operation is legal on this target or can be /// made legal using promotion. This is used to help guide high-level lowering - /// decisions. - bool isOperationLegalOrPromote(unsigned Op, EVT VT) const { + /// decisions. LegalOnly is an optional convenience for code paths traversed + /// pre and post legalisation. + bool isOperationLegalOrPromote(unsigned Op, EVT VT, + bool LegalOnly = false) const { + if (LegalOnly) + return isOperationLegal(Op, VT); + return (VT == MVT::Other || isTypeLegal(VT)) && (getOperationAction(Op, VT) == Legal || getOperationAction(Op, VT) == Promote); @@ -1103,8 +1127,13 @@ public: /// Return true if the specified operation is legal on this target or can be /// made legal with custom lowering or using promotion. This is used to help - /// guide high-level lowering decisions. - bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const { + /// guide high-level lowering decisions. LegalOnly is an optional convenience + /// for code paths traversed pre and post legalisation. + bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, + bool LegalOnly = false) const { + if (LegalOnly) + return isOperationLegal(Op, VT); + return (VT == MVT::Other || isTypeLegal(VT)) && (getOperationAction(Op, VT) == Legal || getOperationAction(Op, VT) == Custom || @@ -1289,6 +1318,10 @@ public: getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); } + // Returns true if VT is a legal index type for masked gathers/scatters + // on this target + virtual bool shouldRemoveExtendFromGSIndex(EVT VT) const { return false; } + /// Return how the condition code should be treated: either it is legal, needs /// to be expanded to some other code sequence, or the target has a custom /// expander for it. @@ -1625,6 +1658,11 @@ public: const MachineMemOperand &MMO, bool *Fast = nullptr) const; + /// LLT handling variant. + bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty, + const MachineMemOperand &MMO, + bool *Fast = nullptr) const; + /// Returns the target specific optimal type for load and store operations as /// a result of memset, memcpy, and memmove lowering. /// It returns EVT::Other if the type should be determined using generic @@ -1663,13 +1701,9 @@ public: virtual bool isJumpTableRelative() const; - /// Return true if a mulh[s|u] node for a specific type is cheaper than - /// a multiply followed by a shift. This is false by default. - virtual bool isMulhCheaperThanMulShift(EVT Type) const { return false; } - /// If a physical register, this specifies the register that /// llvm.savestack/llvm.restorestack should save and restore. - unsigned getStackPointerRegisterToSaveRestore() const { + Register getStackPointerRegisterToSaveRestore() const { return StackPointerRegisterToSaveRestore; } @@ -1758,17 +1792,10 @@ public: return ""; } - /// Returns true if a cast between SrcAS and DestAS is a noop. - virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { - return false; - } - /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we /// are happy to sink it into basic blocks. A cast may be free, but not /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer. - virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { - return isNoopAddrSpaceCast(SrcAS, DestAS); - } + virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const; /// Return true if the pointer arguments to CI should be aligned by aligning /// the object whose address is being passed. If so then MinSize is set to the @@ -3090,16 +3117,6 @@ protected: MachineBasicBlock *emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const; - /// Replace/modify the XRay custom event operands with target-dependent - /// details. - MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI, - MachineBasicBlock *MBB) const; - - /// Replace/modify the XRay typed event operands with target-dependent - /// details. - MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI, - MachineBasicBlock *MBB) const; - bool IsStrictFPEnabled; }; @@ -4188,7 +4205,7 @@ public: // Lower custom output constraints. If invalid, return SDValue(). virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, - SDLoc DL, + const SDLoc &DL, const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const; @@ -4255,6 +4272,20 @@ public: return SDValue(); } + /// Return a target-dependent comparison result if the input operand is + /// suitable for use with a square root estimate calculation. For example, the + /// comparison may check if the operand is NAN, INF, zero, normal, etc. The + /// result should be used as the condition operand for a select or branch. + virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, + const DenormalMode &Mode) const; + + /// Return a target-dependent result if the input operand is not suitable for + /// use with a square root estimate calculation. + virtual SDValue getSqrtResultForDenormInput(SDValue Operand, + SelectionDAG &DAG) const { + return DAG.getConstantFP(0.0, SDLoc(Operand), Operand.getValueType()); + } + //===--------------------------------------------------------------------===// // Legalization utility functions // @@ -4269,7 +4300,7 @@ public: /// \param RL Low bits of the RHS of the MUL. See LL for meaning /// \param RH High bits of the RHS of the MUL. See LL for meaning. /// \returns true if the node has been expanded, false if it has not - bool expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS, + bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, SelectionDAG &DAG, MulExpansionKind Kind, SDValue LL = SDValue(), SDValue LH = SDValue(), @@ -4297,9 +4328,12 @@ public: /// Expand rotations. /// \param N Node to expand + /// \param AllowVectorOps expand vector rotate, this should only be performed + /// if the legalization is happening outside of LegalizeVectorOps /// \param Result output after conversion /// \returns True, if the expansion was successful, false otherwise - bool expandROT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + bool expandROT(SDNode *N, bool AllowVectorOps, SDValue &Result, + SelectionDAG &DAG) const; /// Expand float(f32) to SINT(i64) conversion /// \param N Node to expand @@ -4326,6 +4360,11 @@ public: /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; + /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max. + /// \param N Node to expand + /// \returns The expansion result + SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const; + /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, /// vector nodes can only succeed if all operations are legal/custom. /// \param N Node to expand @@ -4352,8 +4391,10 @@ public: /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) /// \param N Node to expand /// \param Result output after conversion + /// \param IsNegative indicate negated abs /// \returns True, if the expansion was successful, false otherwise - bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG, + bool IsNegative = false) const; /// Turn load of vector type into a load of the individual elements. /// \param LD load to expand @@ -4393,10 +4434,18 @@ public: SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index) const; + /// Method for building the DAG expansion of ISD::[US][MIN|MAX]. This + /// method accepts integers as its arguments. + SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const; + /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This /// method accepts integers as its arguments. SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; + /// Method for building the DAG expansion of ISD::[US]SHLSAT. This + /// method accepts integers as its arguments. + SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const; + /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This /// method accepts integers as its arguments. SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; @@ -4428,6 +4477,9 @@ public: /// only the first Count elements of the vector are used. SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const; + /// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation. + SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const; + /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal. /// Returns true if the expansion was successful. bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const; @@ -4482,6 +4534,10 @@ public: // combiner can fold the new nodes. SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; + /// Give targets the chance to reduce the number of distinct addresing modes. + ISD::MemIndexType getCanonicalIndexType(ISD::MemIndexType IndexType, + EVT MemVT, SDValue Offsets) const; + private: SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, const SDLoc &DL, DAGCombinerInfo &DCI) const; diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index 6e2c0973e354..31e08b7d1e63 100644 --- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -21,6 +21,7 @@ namespace llvm { class GlobalValue; class MachineModuleInfo; +class MachineFunction; class MCContext; class MCExpr; class MCSection; @@ -35,10 +36,9 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { protected: MCSymbolRefExpr::VariantKind PLTRelativeVariantKind = MCSymbolRefExpr::VK_None; - const TargetMachine *TM = nullptr; public: - TargetLoweringObjectFileELF() = default; + TargetLoweringObjectFileELF(); ~TargetLoweringObjectFileELF() override = default; void Initialize(MCContext &Ctx, const TargetMachine &TM) override; @@ -63,6 +63,8 @@ public: MCSection *getSectionForJumpTable(const Function &F, const TargetMachine &TM) const override; + MCSection *getSectionForLSDA(const Function &F, + const TargetMachine &TM) const override; MCSection * getSectionForMachineBasicBlock(const Function &F, @@ -95,6 +97,9 @@ public: const GlobalValue *RHS, const TargetMachine &TM) const override; + const MCExpr *lowerDSOLocalEquivalent(const DSOLocalEquivalent *Equiv, + const TargetMachine &TM) const override; + MCSection *getSectionForCommandLines() const override; }; @@ -143,6 +148,7 @@ public: class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile { mutable unsigned NextUniqueID = 0; + const TargetMachine *TM = nullptr; public: ~TargetLoweringObjectFileCOFF() override = default; @@ -168,12 +174,6 @@ public: MCSection *getStaticDtorSection(unsigned Priority, const MCSymbol *KeySym) const override; - void emitLinkerFlagsForGlobal(raw_ostream &OS, - const GlobalValue *GV) const override; - - void emitLinkerFlagsForUsed(raw_ostream &OS, - const GlobalValue *GV) const override; - const MCExpr *lowerRelativeReference(const GlobalValue *LHS, const GlobalValue *RHS, const TargetMachine &TM) const override; @@ -183,6 +183,9 @@ public: MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment) const override; + +private: + void emitLinkerDirectives(MCStreamer &Streamer, Module &M) const; }; class TargetLoweringObjectFileWasm : public TargetLoweringObjectFile { @@ -217,6 +220,10 @@ public: TargetLoweringObjectFileXCOFF() = default; ~TargetLoweringObjectFileXCOFF() override = default; + static bool ShouldEmitEHBlock(const MachineFunction *MF); + + static MCSymbol *getEHInfoTableSymbol(const MachineFunction *MF); + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference, @@ -246,12 +253,13 @@ public: const Constant *C, Align &Alignment) const override; - static XCOFF::StorageClass getStorageClassForGlobal(const GlobalObject *GO); + static XCOFF::StorageClass getStorageClassForGlobal(const GlobalValue *GV); MCSection * getSectionForFunctionDescriptor(const Function *F, const TargetMachine &TM) const override; - MCSection *getSectionForTOCEntry(const MCSymbol *Sym) const override; + MCSection *getSectionForTOCEntry(const MCSymbol *Sym, + const TargetMachine &TM) const override; /// For external functions, this will always return a function descriptor /// csect. @@ -263,7 +271,7 @@ public: MCSymbol *getTargetSymbol(const GlobalValue *GV, const TargetMachine &TM) const override; - MCSymbol *getFunctionEntryPointSymbol(const Function *F, + MCSymbol *getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const override; }; diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h index a18c8b16bf1c..b4787710379f 100644 --- a/llvm/include/llvm/CodeGen/TargetPassConfig.h +++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h @@ -25,6 +25,7 @@ struct MachineSchedContext; class PassConfigImpl; class ScheduleDAGInstrs; class CSEConfigBase; +class PassInstrumentationCallbacks; // The old pass manager infrastructure is hidden in a legacy namespace now. namespace legacy { @@ -187,7 +188,7 @@ public: /// Insert InsertedPassID pass after TargetPassID pass. void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID, - bool VerifyAfter = true, bool PrintAfter = true); + bool VerifyAfter = true); /// Allow the target to enable a specific standard pass by default. void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); } @@ -313,14 +314,17 @@ public: /// Add a pass to remove debug info from the MIR. void addStripDebugPass(); + /// Add a pass to check synthesized debug info for MIR. + void addCheckDebugPass(); + /// Add standard passes before a pass that's about to be added. For example, /// the DebugifyMachineModulePass if it is enabled. void addMachinePrePasses(bool AllowDebugify = true); /// Add standard passes after a pass that has just been added. For example, /// the MachineVerifier if it is enabled. - void addMachinePostPasses(const std::string &Banner, bool AllowPrint = true, - bool AllowVerify = true, bool AllowStrip = true); + void addMachinePostPasses(const std::string &Banner, bool AllowVerify = true, + bool AllowStrip = true); /// Check whether or not GlobalISel should abort on error. /// When this is disabled, GlobalISel will fall back on SDISel instead of @@ -441,32 +445,30 @@ protected: /// Add a CodeGen pass at this point in the pipeline after checking overrides. /// Return the pass that was added, or zero if no pass was added. - /// @p printAfter if true and adding a machine function pass add an extra - /// machine printer pass afterwards /// @p verifyAfter if true and adding a machine function pass add an extra /// machine verification pass afterwards. - AnalysisID addPass(AnalysisID PassID, bool verifyAfter = true, - bool printAfter = true); + AnalysisID addPass(AnalysisID PassID, bool verifyAfter = true); /// Add a pass to the PassManager if that pass is supposed to be run, as /// determined by the StartAfter and StopAfter options. Takes ownership of the /// pass. - /// @p printAfter if true and adding a machine function pass add an extra - /// machine printer pass afterwards /// @p verifyAfter if true and adding a machine function pass add an extra /// machine verification pass afterwards. - void addPass(Pass *P, bool verifyAfter = true, bool printAfter = true); + void addPass(Pass *P, bool verifyAfter = true); /// addMachinePasses helper to create the target-selected or overriden /// regalloc pass. virtual FunctionPass *createRegAllocPass(bool Optimized); - /// Add core register alloator passes which do the actual register assignment + /// Add core register allocator passes which do the actual register assignment /// and rewriting. \returns true if any passes were added. - virtual bool addRegAssignmentFast(); - virtual bool addRegAssignmentOptimized(); + virtual bool addRegAssignAndRewriteFast(); + virtual bool addRegAssignAndRewriteOptimized(); }; +void registerCodeGenCallback(PassInstrumentationCallbacks &PIC, + LLVMTargetMachine &); + } // end namespace llvm #endif // LLVM_CODEGEN_TARGETPASSCONFIG_H diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index d921c4c9028b..8790e2f09eb6 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -34,6 +34,7 @@ namespace llvm { class BitVector; +class DIExpression; class LiveRegMatrix; class MachineFunction; class MachineInstr; @@ -87,22 +88,21 @@ public: /// Return true if the specified register is included in this register class. /// This does not include virtual registers. - bool contains(unsigned Reg) const { + bool contains(Register Reg) const { /// FIXME: Historically this function has returned false when given vregs /// but it should probably only receive physical registers - if (!Register::isPhysicalRegister(Reg)) + if (!Reg.isPhysical()) return false; - return MC->contains(Reg); + return MC->contains(Reg.asMCReg()); } /// Return true if both registers are in this class. - bool contains(unsigned Reg1, unsigned Reg2) const { + bool contains(Register Reg1, Register Reg2) const { /// FIXME: Historically this function has returned false when given a vregs /// but it should probably only receive physical registers - if (!Register::isPhysicalRegister(Reg1) || - !Register::isPhysicalRegister(Reg2)) + if (!Reg1.isPhysical() || !Reg2.isPhysical()) return false; - return MC->contains(Reg1, Reg2); + return MC->contains(Reg1.asMCReg(), Reg2.asMCReg()); } /// Return the cost of copying a value between two registers in this class. @@ -386,12 +386,12 @@ public: /// The registers may be virtual registers. bool regsOverlap(Register regA, Register regB) const { if (regA == regB) return true; - if (regA.isVirtual() || regB.isVirtual()) + if (!regA.isPhysical() || !regB.isPhysical()) return false; // Regunits are numerically ordered. Find a common unit. - MCRegUnitIterator RUA(regA, this); - MCRegUnitIterator RUB(regB, this); + MCRegUnitIterator RUA(regA.asMCReg(), this); + MCRegUnitIterator RUB(regB.asMCReg(), this); do { if (*RUA == *RUB) return true; if (*RUA < *RUB) ++RUA; @@ -401,9 +401,9 @@ public: } /// Returns true if Reg contains RegUnit. - bool hasRegUnit(MCRegister Reg, unsigned RegUnit) const { + bool hasRegUnit(MCRegister Reg, Register RegUnit) const { for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units) - if (*Units == RegUnit) + if (Register(*Units) == RegUnit) return true; return false; } @@ -415,6 +415,16 @@ public: virtual Register lookThruCopyLike(Register SrcReg, const MachineRegisterInfo *MRI) const; + /// Find the original SrcReg unless it is the target of a copy-like operation, + /// in which case we chain backwards through all such operations to the + /// ultimate source register. If a physical register is encountered, we stop + /// the search. + /// Return the original SrcReg if all the definitions in the chain only have + /// one user and not a physical register. + virtual Register + lookThruSingleUseCopyChain(Register SrcReg, + const MachineRegisterInfo *MRI) const; + /// Return a null-terminated list of all of the callee-saved registers on /// this target. The register should be in the order of desired callee-save /// stack frame offset. The first register is closest to the incoming stack @@ -449,6 +459,13 @@ public: return nullptr; } + /// Return a register mask for the registers preserved by the unwinder, + /// or nullptr if no custom mask is needed. + virtual const uint32_t * + getCustomEHPadPreservedMask(const MachineFunction &MF) const { + return nullptr; + } + /// Return a register mask that clobbers everything. virtual const uint32_t *getNoPreservedMask() const { llvm_unreachable("target does not provide no preserved mask"); @@ -894,11 +911,11 @@ public: return false; } - /// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx - /// before insertion point I. - virtual void materializeFrameBaseRegister(MachineBasicBlock *MBB, - Register BaseReg, int FrameIdx, - int64_t Offset) const { + /// Insert defining instruction(s) for a pointer to FrameIdx before + /// insertion point I. Return materialized frame pointer. + virtual Register materializeFrameBaseRegister(MachineBasicBlock *MBB, + int FrameIdx, + int64_t Offset) const { llvm_unreachable("materializeFrameBaseRegister does not exist on this " "target"); } @@ -917,6 +934,15 @@ public: llvm_unreachable("isFrameOffsetLegal does not exist on this target"); } + /// Gets the DWARF expression opcodes for \p Offset. + virtual void getOffsetOpcodes(const StackOffset &Offset, + SmallVectorImpl<uint64_t> &Ops) const; + + /// Prepends a DWARF expression for \p Offset to DIExpression \p Expr. + DIExpression * + prependOffsetExpression(const DIExpression *Expr, unsigned PrependFlags, + const StackOffset &Offset) const; + /// Spill the register so it can be used by the register scavenger. /// Return true if the register was spilled, false otherwise. /// If this function does not spill the register, the scavenger @@ -970,6 +996,36 @@ public: virtual bool shouldRegionSplitForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const; + /// Last chance recoloring has a high compile time cost especially for + /// targets with a lot of registers. + /// This method is used to decide whether or not \p VirtReg should + /// go through this expensive heuristic. + /// When this target hook is hit, by returning false, there is a high + /// chance that the register allocation will fail altogether (usually with + /// "ran out of registers"). + /// That said, this error usually points to another problem in the + /// optimization pipeline. + virtual bool + shouldUseLastChanceRecoloringForVirtReg(const MachineFunction &MF, + const LiveInterval &VirtReg) const { + return true; + } + + /// Deferred spilling delays the spill insertion of a virtual register + /// after every other allocation. By deferring the spilling, it is + /// sometimes possible to eliminate that spilling altogether because + /// something else could have been eliminated, thus leaving some space + /// for the virtual register. + /// However, this comes with a compile time impact because it adds one + /// more stage to the greedy register allocator. + /// This method is used to decide whether \p VirtReg should use the deferred + /// spilling stage instead of being spilled right away. + virtual bool + shouldUseDeferredSpillingForVirtReg(const MachineFunction &MF, + const LiveInterval &VirtReg) const { + return false; + } + //===--------------------------------------------------------------------===// /// Debug information queries. @@ -994,7 +1050,7 @@ public: /// Returns the physical register number of sub-register "Index" /// for physical register RegNo. Return zero if the sub-register does not /// exist. - inline Register getSubReg(MCRegister Reg, unsigned Idx) const { + inline MCRegister getSubReg(MCRegister Reg, unsigned Idx) const { return static_cast<const MCRegisterInfo *>(this)->getSubReg(Reg, Idx); } }; @@ -1146,8 +1202,8 @@ public: // This is useful when building IndexedMaps keyed on virtual registers struct VirtReg2IndexFunctor { - using argument_type = unsigned; - unsigned operator()(unsigned Reg) const { + using argument_type = Register; + unsigned operator()(Register Reg) const { return Register::virtReg2Index(Reg); } }; diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h index e0dfd9c8bbc5..3fac2f688dd8 100644 --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -58,8 +58,8 @@ class Triple; /// class TargetSubtargetInfo : public MCSubtargetInfo { protected: // Can only create subclasses... - TargetSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS, - ArrayRef<SubtargetFeatureKV> PF, + TargetSubtargetInfo(const Triple &TT, StringRef CPU, StringRef TuneCPU, + StringRef FS, ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, diff --git a/llvm/include/llvm/CodeGen/TileShapeInfo.h b/llvm/include/llvm/CodeGen/TileShapeInfo.h new file mode 100644 index 000000000000..031d23555b7e --- /dev/null +++ b/llvm/include/llvm/CodeGen/TileShapeInfo.h @@ -0,0 +1,97 @@ +//===- llvm/CodeGen/TileShapeInfo.h - ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file Shape utility for AMX. +/// AMX hardware requires to config the shape of tile data register before use. +/// The 2D shape includes row and column. In AMX intrinsics interface the shape +/// is passed as 1st and 2nd parameter and they are lowered as the 1st and 2nd +/// machine operand of AMX pseudo instructions. ShapeT class is to facilitate +/// tile config and register allocator. The row and column are machine operand +/// of AMX pseudo instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_TILESHAPEINFO_H +#define LLVM_CODEGEN_TILESHAPEINFO_H + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" +#include <utility> + +namespace llvm { + +class ShapeT { +public: + ShapeT(MachineOperand *Row, MachineOperand *Col, + const MachineRegisterInfo *MRI = nullptr) + : Row(Row), Col(Col) { + if (MRI) + deduceImm(MRI); + } + ShapeT() + : Row(nullptr), Col(nullptr), RowImm(InvalidImmShape), + ColImm(InvalidImmShape) {} + bool operator==(const ShapeT &Shape) { + MachineOperand *R = Shape.Row; + MachineOperand *C = Shape.Col; + if (!R || !C) + return false; + if (!Row || !Col) + return false; + if (Row->getReg() == R->getReg() && Col->getReg() == C->getReg()) + return true; + if ((RowImm != InvalidImmShape) && (ColImm != InvalidImmShape)) + return RowImm == Shape.getRowImm() && ColImm == Shape.getColImm(); + return false; + } + + bool operator!=(const ShapeT &Shape) { return !(*this == Shape); } + + MachineOperand *getRow() const { return Row; } + + MachineOperand *getCol() const { return Col; } + + int64_t getRowImm() const { return RowImm; } + + int64_t getColImm() const { return ColImm; } + + bool isValid() { return (Row != nullptr) && (Col != nullptr); } + + void deduceImm(const MachineRegisterInfo *MRI) { + // All def must be the same value, otherwise it is invalid MIs. + // Find the immediate. + // TODO copy propagation. + auto GetImm = [&](Register Reg) { + int64_t Imm = InvalidImmShape; + for (const MachineOperand &DefMO : MRI->def_operands(Reg)) { + const auto *MI = DefMO.getParent(); + if (MI->isMoveImmediate()) { + Imm = MI->getOperand(1).getImm(); + break; + } + } + return Imm; + }; + RowImm = GetImm(Row->getReg()); + ColImm = GetImm(Col->getReg()); + } + +private: + static constexpr int64_t InvalidImmShape = -1; + MachineOperand *Row; + MachineOperand *Col; + int64_t RowImm; + int64_t ColImm; +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h index db8161caf7d2..888b83d6f736 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.h +++ b/llvm/include/llvm/CodeGen/ValueTypes.h @@ -92,26 +92,17 @@ namespace llvm { /// with the element type converted to an integer type with the same /// bitwidth. EVT changeVectorElementTypeToInteger() const { - if (!isSimple()) - return changeExtendedVectorElementTypeToInteger(); - MVT EltTy = getSimpleVT().getVectorElementType(); - unsigned BitWidth = EltTy.getSizeInBits(); - MVT IntTy = MVT::getIntegerVT(BitWidth); - MVT VecTy = MVT::getVectorVT(IntTy, getVectorElementCount()); - assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE && - "Simple vector VT not representable by simple integer vector VT!"); - return VecTy; + if (isSimple()) + return getSimpleVT().changeVectorElementTypeToInteger(); + return changeExtendedVectorElementTypeToInteger(); } /// Return a VT for a vector type whose attributes match ourselves /// with the exception of the element type that is chosen by the caller. EVT changeVectorElementType(EVT EltVT) const { - if (!isSimple()) - return changeExtendedVectorElementType(EltVT); - MVT VecTy = MVT::getVectorVT(EltVT.V, getVectorElementCount()); - assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE && - "Simple vector VT not representable by simple integer vector VT!"); - return VecTy; + if (isSimple() && EltVT.isSimple()) + return getSimpleVT().changeVectorElementType(EltVT.getSimpleVT()); + return changeExtendedVectorElementType(EltVT); } /// Return the type converted to an equivalently sized integer or vector @@ -122,8 +113,7 @@ namespace llvm { return changeVectorElementTypeToInteger(); if (isSimple()) - return MVT::getIntegerVT(getSizeInBits()); - + return getSimpleVT().changeTypeToInteger(); return changeExtendedTypeToInteger(); } @@ -214,9 +204,7 @@ namespace llvm { } /// Return true if the bit size is a multiple of 8. - bool isByteSized() const { - return getSizeInBits().isByteSized(); - } + bool isByteSized() const { return getSizeInBits().isKnownMultipleOf(8); } /// Return true if the size is a power-of-two number of bytes. bool isRound() const { @@ -232,28 +220,58 @@ namespace llvm { return getSizeInBits() == VT.getSizeInBits(); } + /// Return true if we know at compile time this has more bits than VT. + bool knownBitsGT(EVT VT) const { + return TypeSize::isKnownGT(getSizeInBits(), VT.getSizeInBits()); + } + + /// Return true if we know at compile time this has more than or the same + /// bits as VT. + bool knownBitsGE(EVT VT) const { + return TypeSize::isKnownGE(getSizeInBits(), VT.getSizeInBits()); + } + + /// Return true if we know at compile time this has fewer bits than VT. + bool knownBitsLT(EVT VT) const { + return TypeSize::isKnownLT(getSizeInBits(), VT.getSizeInBits()); + } + + /// Return true if we know at compile time this has fewer than or the same + /// bits as VT. + bool knownBitsLE(EVT VT) const { + return TypeSize::isKnownLE(getSizeInBits(), VT.getSizeInBits()); + } + /// Return true if this has more bits than VT. bool bitsGT(EVT VT) const { if (EVT::operator==(VT)) return false; - return getSizeInBits() > VT.getSizeInBits(); + assert(isScalableVector() == VT.isScalableVector() && + "Comparison between scalable and fixed types"); + return knownBitsGT(VT); } /// Return true if this has no less bits than VT. bool bitsGE(EVT VT) const { if (EVT::operator==(VT)) return true; - return getSizeInBits() >= VT.getSizeInBits(); + assert(isScalableVector() == VT.isScalableVector() && + "Comparison between scalable and fixed types"); + return knownBitsGE(VT); } /// Return true if this has less bits than VT. bool bitsLT(EVT VT) const { if (EVT::operator==(VT)) return false; - return getSizeInBits() < VT.getSizeInBits(); + assert(isScalableVector() == VT.isScalableVector() && + "Comparison between scalable and fixed types"); + return knownBitsLT(VT); } /// Return true if this has no more bits than VT. bool bitsLE(EVT VT) const { if (EVT::operator==(VT)) return true; - return getSizeInBits() <= VT.getSizeInBits(); + assert(isScalableVector() == VT.isScalableVector() && + "Comparison between scalable and fixed types"); + return knownBitsLE(VT); } /// Return the SimpleValueType held in the specified simple EVT. @@ -285,7 +303,7 @@ namespace llvm { if (isScalableVector()) WithColor::warning() << "Possible incorrect use of EVT::getVectorNumElements() for " - "scalable vector. Scalable flag may be dropped, use" + "scalable vector. Scalable flag may be dropped, use " "EVT::getVectorElementCount() instead\n"; #endif if (isSimple()) @@ -304,7 +322,7 @@ namespace llvm { /// Given a vector type, return the minimum number of elements it contains. unsigned getVectorMinNumElements() const { - return getVectorElementCount().Min; + return getVectorElementCount().getKnownMinValue(); } /// Return the size of the specified value type in bits. @@ -318,8 +336,14 @@ namespace llvm { return getExtendedSizeInBits(); } - TypeSize getScalarSizeInBits() const { - return getScalarType().getSizeInBits(); + /// Return the size of the specified fixed width value type in bits. The + /// function will assert if the type is scalable. + uint64_t getFixedSizeInBits() const { + return getSizeInBits().getFixedSize(); + } + + uint64_t getScalarSizeInBits() const { + return getScalarType().getSizeInBits().getFixedSize(); } /// Return the number of bytes overwritten by a store of the specified value @@ -383,8 +407,17 @@ namespace llvm { EVT getHalfNumVectorElementsVT(LLVMContext &Context) const { EVT EltVT = getVectorElementType(); auto EltCnt = getVectorElementCount(); - assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!"); - return EVT::getVectorVT(Context, EltVT, EltCnt / 2); + assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!"); + return EVT::getVectorVT(Context, EltVT, EltCnt.divideCoefficientBy(2)); + } + + // Return a VT for a vector type with the same element type but + // double the number of elements. The type returned may be an + // extended type. + EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const { + EVT EltVT = getVectorElementType(); + auto EltCnt = getVectorElementCount(); + return EVT::getVectorVT(Context, EltVT, EltCnt * 2); } /// Returns true if the given vector is a power of 2. @@ -398,7 +431,8 @@ namespace llvm { EVT getPow2VectorType(LLVMContext &Context) const { if (!isPow2VectorType()) { ElementCount NElts = getVectorElementCount(); - NElts.Min = 1 << Log2_32_Ceil(NElts.Min); + unsigned NewMinCount = 1 << Log2_32_Ceil(NElts.getKnownMinValue()); + NElts = ElementCount::get(NewMinCount, NElts.isScalable()); return EVT::getVectorVT(Context, getVectorElementType(), NElts); } else { diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index c5eb87cf1d34..d13d0a7772e9 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -87,107 +87,116 @@ def v4i64 : ValueType<256, 60>; // 4 x i64 vector value def v8i64 : ValueType<512, 61>; // 8 x i64 vector value def v16i64 : ValueType<1024,62>; // 16 x i64 vector value def v32i64 : ValueType<2048,63>; // 32 x i64 vector value +def v64i64 : ValueType<4096,64>; // 64 x i64 vector value +def v128i64: ValueType<8192,65>; // 128 x i64 vector value +def v256i64: ValueType<16384,66>; // 256 x i64 vector value + +def v1i128 : ValueType<128, 67>; // 1 x i128 vector value + +def v2f16 : ValueType<32 , 68>; // 2 x f16 vector value +def v3f16 : ValueType<48 , 69>; // 3 x f16 vector value +def v4f16 : ValueType<64 , 70>; // 4 x f16 vector value +def v8f16 : ValueType<128, 71>; // 8 x f16 vector value +def v16f16 : ValueType<256, 72>; // 16 x f16 vector value +def v32f16 : ValueType<512, 73>; // 32 x f16 vector value +def v64f16 : ValueType<1024, 74>; // 64 x f16 vector value +def v128f16 : ValueType<2048, 75>; // 128 x f16 vector value +def v2bf16 : ValueType<32 , 76>; // 2 x bf16 vector value +def v3bf16 : ValueType<48 , 77>; // 3 x bf16 vector value +def v4bf16 : ValueType<64 , 78>; // 4 x bf16 vector value +def v8bf16 : ValueType<128, 79>; // 8 x bf16 vector value +def v16bf16 : ValueType<256, 80>; // 16 x bf16 vector value +def v32bf16 : ValueType<512, 81>; // 32 x bf16 vector value +def v64bf16 : ValueType<1024, 82>; // 64 x bf16 vector value +def v128bf16 : ValueType<2048, 83>; // 128 x bf16 vector value +def v1f32 : ValueType<32 , 84>; // 1 x f32 vector value +def v2f32 : ValueType<64 , 85>; // 2 x f32 vector value +def v3f32 : ValueType<96 , 86>; // 3 x f32 vector value +def v4f32 : ValueType<128, 87>; // 4 x f32 vector value +def v5f32 : ValueType<160, 88>; // 5 x f32 vector value +def v8f32 : ValueType<256, 89>; // 8 x f32 vector value +def v16f32 : ValueType<512, 90>; // 16 x f32 vector value +def v32f32 : ValueType<1024, 91>; // 32 x f32 vector value +def v64f32 : ValueType<2048, 92>; // 64 x f32 vector value +def v128f32 : ValueType<4096, 93>; // 128 x f32 vector value +def v256f32 : ValueType<8182, 94>; // 256 x f32 vector value +def v512f32 : ValueType<16384, 95>; // 512 x f32 vector value +def v1024f32 : ValueType<32768, 96>; // 1024 x f32 vector value +def v2048f32 : ValueType<65536, 97>; // 2048 x f32 vector value +def v1f64 : ValueType<64, 98>; // 1 x f64 vector value +def v2f64 : ValueType<128, 99>; // 2 x f64 vector value +def v4f64 : ValueType<256, 100>; // 4 x f64 vector value +def v8f64 : ValueType<512, 101>; // 8 x f64 vector value +def v16f64 : ValueType<1024, 102>; // 16 x f64 vector value +def v32f64 : ValueType<2048, 103>; // 32 x f64 vector value +def v64f64 : ValueType<4096, 104>; // 64 x f64 vector value +def v128f64 : ValueType<8192, 105>; // 128 x f64 vector value +def v256f64 : ValueType<16384, 106>; // 256 x f64 vector value + +def nxv1i1 : ValueType<1, 107>; // n x 1 x i1 vector value +def nxv2i1 : ValueType<2, 108>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 109>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 110>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 111>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 112>; // n x 32 x i1 vector value +def nxv64i1 : ValueType<64,113>; // n x 64 x i1 vector value + +def nxv1i8 : ValueType<8, 114>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 115>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 116>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 117>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 118>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 119>; // n x 32 x i8 vector value +def nxv64i8 : ValueType<512, 120>; // n x 64 x i8 vector value + +def nxv1i16 : ValueType<16, 121>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 122>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 123>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 124>; // n x 8 x i16 vector value +def nxv16i16: ValueType<256, 125>; // n x 16 x i16 vector value +def nxv32i16: ValueType<512, 126>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 127>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 128>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 129>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 130>; // n x 8 x i32 vector value +def nxv16i32: ValueType<512, 131>; // n x 16 x i32 vector value +def nxv32i32: ValueType<1024,132>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 133>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 134>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 135>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 136>; // n x 8 x i64 vector value +def nxv16i64: ValueType<1024,137>; // n x 16 x i64 vector value +def nxv32i64: ValueType<2048,138>; // n x 32 x i64 vector value + +def nxv1f16 : ValueType<32, 139>; // n x 1 x f16 vector value +def nxv2f16 : ValueType<32 , 140>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64 , 141>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 142>; // n x 8 x f16 vector value +def nxv16f16 : ValueType<256,143>; // n x 16 x f16 vector value +def nxv32f16 : ValueType<512,144>; // n x 32 x f16 vector value +def nxv2bf16 : ValueType<32 , 145>; // n x 2 x bf16 vector value +def nxv4bf16 : ValueType<64 , 146>; // n x 4 x bf16 vector value +def nxv8bf16 : ValueType<128, 147>; // n x 8 x bf16 vector value +def nxv1f32 : ValueType<32 , 148>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64 , 149>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 150>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 151>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 152>; // n x 16 x f32 vector value +def nxv1f64 : ValueType<64, 153>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 154>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 155>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 156>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64 , 157>; // X86 MMX value +def FlagVT : ValueType<0 , 158>; // Pre-RA sched glue +def isVoid : ValueType<0 , 159>; // Produces no value +def untyped: ValueType<8 , 160>; // Produces an untyped value +def funcref : ValueType<0 , 161>; // WebAssembly's funcref type +def externref : ValueType<0 , 162>; // WebAssembly's externref type +def x86amx : ValueType<8192, 163>; // X86 AMX value -def v1i128 : ValueType<128, 64>; // 1 x i128 vector value - -def v2f16 : ValueType<32 , 65>; // 2 x f16 vector value -def v3f16 : ValueType<48 , 66>; // 3 x f16 vector value -def v4f16 : ValueType<64 , 67>; // 4 x f16 vector value -def v8f16 : ValueType<128, 68>; // 8 x f16 vector value -def v16f16 : ValueType<256, 69>; // 16 x f16 vector value -def v32f16 : ValueType<512, 70>; // 32 x f16 vector value -def v64f16 : ValueType<1024, 71>; // 64 x f16 vector value -def v128f16 : ValueType<2048, 72>; // 128 x f16 vector value -def v2bf16 : ValueType<32 , 73>; // 2 x bf16 vector value -def v3bf16 : ValueType<48 , 74>; // 3 x bf16 vector value -def v4bf16 : ValueType<64 , 75>; // 4 x bf16 vector value -def v8bf16 : ValueType<128, 76>; // 8 x bf16 vector value -def v16bf16 : ValueType<256, 77>; // 16 x bf16 vector value -def v32bf16 : ValueType<512, 78>; // 32 x bf16 vector value -def v64bf16 : ValueType<1024, 79>; // 64 x bf16 vector value -def v128bf16 : ValueType<2048, 80>; // 128 x bf16 vector value -def v1f32 : ValueType<32 , 81>; // 1 x f32 vector value -def v2f32 : ValueType<64 , 82>; // 2 x f32 vector value -def v3f32 : ValueType<96 , 83>; // 3 x f32 vector value -def v4f32 : ValueType<128, 84>; // 4 x f32 vector value -def v5f32 : ValueType<160, 85>; // 5 x f32 vector value -def v8f32 : ValueType<256, 86>; // 8 x f32 vector value -def v16f32 : ValueType<512, 87>; // 16 x f32 vector value -def v32f32 : ValueType<1024, 88>; // 32 x f32 vector value -def v64f32 : ValueType<2048, 89>; // 64 x f32 vector value -def v128f32 : ValueType<4096, 90>; // 128 x f32 vector value -def v256f32 : ValueType<8182, 91>; // 256 x f32 vector value -def v512f32 : ValueType<16384, 92>; // 512 x f32 vector value -def v1024f32 : ValueType<32768, 93>; // 1024 x f32 vector value -def v2048f32 : ValueType<65536, 94>; // 2048 x f32 vector value -def v1f64 : ValueType<64, 95>; // 1 x f64 vector value -def v2f64 : ValueType<128, 96>; // 2 x f64 vector value -def v4f64 : ValueType<256, 97>; // 4 x f64 vector value -def v8f64 : ValueType<512, 98>; // 8 x f64 vector value -def v16f64 : ValueType<1024, 99>; // 16 x f64 vector value -def v32f64 : ValueType<2048, 100>; // 32 x f64 vector value - -def nxv1i1 : ValueType<1, 101>; // n x 1 x i1 vector value -def nxv2i1 : ValueType<2, 102>; // n x 2 x i1 vector value -def nxv4i1 : ValueType<4, 103>; // n x 4 x i1 vector value -def nxv8i1 : ValueType<8, 104>; // n x 8 x i1 vector value -def nxv16i1 : ValueType<16, 105>; // n x 16 x i1 vector value -def nxv32i1 : ValueType<32, 106>; // n x 32 x i1 vector value -def nxv64i1 : ValueType<64,107>; // n x 64 x i1 vector value - -def nxv1i8 : ValueType<8, 108>; // n x 1 x i8 vector value -def nxv2i8 : ValueType<16, 109>; // n x 2 x i8 vector value -def nxv4i8 : ValueType<32, 110>; // n x 4 x i8 vector value -def nxv8i8 : ValueType<64, 111>; // n x 8 x i8 vector value -def nxv16i8 : ValueType<128, 112>; // n x 16 x i8 vector value -def nxv32i8 : ValueType<256, 113>; // n x 32 x i8 vector value -def nxv64i8 : ValueType<512, 114>; // n x 64 x i8 vector value - -def nxv1i16 : ValueType<16, 115>; // n x 1 x i16 vector value -def nxv2i16 : ValueType<32, 116>; // n x 2 x i16 vector value -def nxv4i16 : ValueType<64, 117>; // n x 4 x i16 vector value -def nxv8i16 : ValueType<128, 118>; // n x 8 x i16 vector value -def nxv16i16: ValueType<256, 119>; // n x 16 x i16 vector value -def nxv32i16: ValueType<512, 120>; // n x 32 x i16 vector value - -def nxv1i32 : ValueType<32, 121>; // n x 1 x i32 vector value -def nxv2i32 : ValueType<64, 122>; // n x 2 x i32 vector value -def nxv4i32 : ValueType<128, 123>; // n x 4 x i32 vector value -def nxv8i32 : ValueType<256, 124>; // n x 8 x i32 vector value -def nxv16i32: ValueType<512, 125>; // n x 16 x i32 vector value -def nxv32i32: ValueType<1024,126>; // n x 32 x i32 vector value - -def nxv1i64 : ValueType<64, 127>; // n x 1 x i64 vector value -def nxv2i64 : ValueType<128, 128>; // n x 2 x i64 vector value -def nxv4i64 : ValueType<256, 129>; // n x 4 x i64 vector value -def nxv8i64 : ValueType<512, 130>; // n x 8 x i64 vector value -def nxv16i64: ValueType<1024,131>; // n x 16 x i64 vector value -def nxv32i64: ValueType<2048,132>; // n x 32 x i64 vector value - -def nxv1f16 : ValueType<32, 133>; // n x 1 x f16 vector value -def nxv2f16 : ValueType<32 , 134>; // n x 2 x f16 vector value -def nxv4f16 : ValueType<64 , 135>; // n x 4 x f16 vector value -def nxv8f16 : ValueType<128, 136>; // n x 8 x f16 vector value -def nxv16f16 : ValueType<256,137>; // n x 16 x f16 vector value -def nxv32f16 : ValueType<512,138>; // n x 32 x f16 vector value -def nxv2bf16 : ValueType<32 , 139>; // n x 2 x bf16 vector value -def nxv4bf16 : ValueType<64 , 140>; // n x 4 x bf16 vector value -def nxv8bf16 : ValueType<128, 141>; // n x 8 x bf16 vector value -def nxv1f32 : ValueType<32 , 142>; // n x 1 x f32 vector value -def nxv2f32 : ValueType<64 , 143>; // n x 2 x f32 vector value -def nxv4f32 : ValueType<128, 144>; // n x 4 x f32 vector value -def nxv8f32 : ValueType<256, 145>; // n x 8 x f32 vector value -def nxv16f32 : ValueType<512, 146>; // n x 16 x f32 vector value -def nxv1f64 : ValueType<64, 147>; // n x 1 x f64 vector value -def nxv2f64 : ValueType<128, 148>; // n x 2 x f64 vector value -def nxv4f64 : ValueType<256, 149>; // n x 4 x f64 vector value -def nxv8f64 : ValueType<512, 150>; // n x 8 x f64 vector value - -def x86mmx : ValueType<64 , 151>; // X86 MMX value -def FlagVT : ValueType<0 , 152>; // Pre-RA sched glue -def isVoid : ValueType<0 , 153>; // Produces no value -def untyped: ValueType<8 , 154>; // Produces an untyped value -def exnref : ValueType<0 , 155>; // WebAssembly's exnref type def token : ValueType<0 , 248>; // TokenTy def MetadataVT: ValueType<0, 249>; // Metadata diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h b/llvm/include/llvm/CodeGen/VirtRegMap.h index 823154318eb7..deef4b90279a 100644 --- a/llvm/include/llvm/CodeGen/VirtRegMap.h +++ b/llvm/include/llvm/CodeGen/VirtRegMap.h @@ -19,6 +19,7 @@ #include "llvm/ADT/IndexedMap.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TileShapeInfo.h" #include "llvm/Pass.h" #include <cassert> @@ -60,6 +61,10 @@ class TargetInstrInfo; /// mapping. IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap; + /// Virt2ShapeMap - For X86 AMX register whose register is bound shape + /// information. + DenseMap<unsigned, ShapeT> Virt2ShapeMap; + /// createSpillSlot - Allocate a spill slot for RC from MFI. unsigned createSpillSlot(const TargetRegisterClass *RC); @@ -98,15 +103,30 @@ class TargetInstrInfo; /// returns the physical register mapped to the specified /// virtual register - Register getPhys(Register virtReg) const { + MCRegister getPhys(Register virtReg) const { assert(virtReg.isVirtual()); - return Virt2PhysMap[virtReg.id()]; + return MCRegister::from(Virt2PhysMap[virtReg.id()]); } /// creates a mapping for the specified virtual register to /// the specified physical register void assignVirt2Phys(Register virtReg, MCPhysReg physReg); + bool isShapeMapEmpty() const { return Virt2ShapeMap.empty(); } + + bool hasShape(Register virtReg) const { + return getShape(virtReg).isValid(); + } + + ShapeT getShape(Register virtReg) const { + assert(virtReg.isVirtual()); + return Virt2ShapeMap.lookup(virtReg); + } + + void assignVirt2Shape(Register virtReg, ShapeT shape) { + Virt2ShapeMap[virtReg.id()] = shape; + } + /// clears the specified virtual register's, physical /// register mapping void clearVirt(Register virtReg) { @@ -131,12 +151,15 @@ class TargetInstrInfo; bool hasKnownPreference(Register VirtReg); /// records virtReg is a split live interval from SReg. - void setIsSplitFromReg(Register virtReg, unsigned SReg) { + void setIsSplitFromReg(Register virtReg, Register SReg) { Virt2SplitMap[virtReg.id()] = SReg; + if (hasShape(SReg)) { + Virt2ShapeMap[virtReg.id()] = getShape(SReg); + } } /// returns the live interval virtReg is split from. - unsigned getPreSplitReg(Register virtReg) const { + Register getPreSplitReg(Register virtReg) const { return Virt2SplitMap[virtReg.id()]; } @@ -144,8 +167,8 @@ class TargetInstrInfo; /// from through splitting. /// A register that was not created by splitting is its own original. /// This operation is idempotent. - unsigned getOriginal(unsigned VirtReg) const { - unsigned Orig = getPreSplitReg(VirtReg); + Register getOriginal(Register VirtReg) const { + Register Orig = getPreSplitReg(VirtReg); return Orig ? Orig : VirtReg; } diff --git a/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h b/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h index 41f8856f31f2..54e8c40a9e72 100644 --- a/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h +++ b/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h @@ -22,7 +22,9 @@ class BasicBlock; class Function; class MachineBasicBlock; +namespace WebAssembly { enum EventTag { CPP_EXCEPTION = 0, C_LONGJMP = 1 }; +} using BBOrMBB = PointerUnion<const BasicBlock *, MachineBasicBlock *>; diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h index be3c5ebcadae..7281966fc608 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h @@ -64,14 +64,17 @@ public: /// section. Reset current relocation pointer if neccessary. virtual bool hasValidRelocs(bool ResetRelocsPtr = true) = 0; - /// Checks that there is a relocation against .debug_info - /// table between \p StartOffset and \p NextOffset. - /// - /// This function must be called with offsets in strictly ascending - /// order because it never looks back at relocations it already 'went past'. - /// \returns true and sets Info.InDebugMap if it is the case. - virtual bool hasValidRelocationAt(uint64_t StartOffset, uint64_t EndOffset, - CompileUnit::DIEInfo &Info) = 0; + /// Checks that the specified DIE has a DW_AT_Location attribute + /// that references into a live code section. This function + /// must be called with DIE offsets in strictly ascending order. + virtual bool hasLiveMemoryLocation(const DWARFDie &DIE, + CompileUnit::DIEInfo &Info) = 0; + + /// Checks that the specified DIE has a DW_AT_Low_pc attribute + /// that references into a live code section. This function + /// must be called with DIE offsets in strictly ascending order. + virtual bool hasLiveAddressRange(const DWARFDie &DIE, + CompileUnit::DIEInfo &Info) = 0; /// Apply the valid relocations to the buffer \p Data, taking into /// account that Data is at \p BaseOffset in the debug_info section. @@ -82,6 +85,9 @@ public: virtual bool applyValidRelocs(MutableArrayRef<char> Data, uint64_t BaseOffset, bool IsLittleEndian) = 0; + /// Relocate the given address offset if a valid relocation exists. + virtual llvm::Expected<uint64_t> relocateIndexedAddr(uint64_t Offset) = 0; + /// Returns all valid functions address ranges(i.e., those ranges /// which points to sections with code). virtual RangesTy &getValidAddressRanges() = 0; @@ -180,7 +186,8 @@ public: /// /// As a side effect, this also switches the current Dwarf version /// of the MC layer to the one of U.getOrigUnit(). - virtual void emitCompileUnitHeader(CompileUnit &Unit) = 0; + virtual void emitCompileUnitHeader(CompileUnit &Unit, + unsigned DwarfVersion) = 0; /// Recursively emit the DIE tree rooted at \p Die. virtual void emitDIE(DIE &Die) = 0; @@ -202,9 +209,9 @@ using UnitListTy = std::vector<std::unique_ptr<CompileUnit>>; /// this class represents DWARF information for source file /// and it`s address map. -class DwarfFile { +class DWARFFile { public: - DwarfFile(StringRef Name, DWARFContext *Dwarf, AddressesMap *Addresses, + DWARFFile(StringRef Name, DWARFContext *Dwarf, AddressesMap *Addresses, const std::vector<std::string> &Warnings) : FileName(Name), Dwarf(Dwarf), Addresses(Addresses), Warnings(Warnings) { } @@ -222,7 +229,7 @@ public: typedef std::function<void(const Twine &Warning, StringRef Context, const DWARFDie *DIE)> messageHandler; -typedef std::function<ErrorOr<DwarfFile &>(StringRef ContainerName, +typedef std::function<ErrorOr<DWARFFile &>(StringRef ContainerName, StringRef Path)> objFileLoader; typedef std::map<std::string, std::string> swiftInterfacesMap; @@ -249,7 +256,7 @@ public: : TheDwarfEmitter(Emitter), DwarfLinkerClientID(ClientID) {} /// Add object file to be linked. - void addObjectFile(DwarfFile &File); + void addObjectFile(DWARFFile &File); /// Link debug info for added objFiles. Object /// files are linked all together. @@ -353,36 +360,38 @@ private: /// of work needs to be performed when processing the current item. The flags /// and info fields are optional based on the type. struct WorklistItem { - WorklistItemType Type; DWARFDie Die; + WorklistItemType Type; CompileUnit &CU; unsigned Flags; - unsigned AncestorIdx = 0; - CompileUnit::DIEInfo *OtherInfo = nullptr; + union { + const unsigned AncestorIdx; + CompileUnit::DIEInfo *OtherInfo; + }; WorklistItem(DWARFDie Die, CompileUnit &CU, unsigned Flags, WorklistItemType T = WorklistItemType::LookForDIEsToKeep) - : Type(T), Die(Die), CU(CU), Flags(Flags) {} + : Die(Die), Type(T), CU(CU), Flags(Flags), AncestorIdx(0) {} WorklistItem(DWARFDie Die, CompileUnit &CU, WorklistItemType T, CompileUnit::DIEInfo *OtherInfo = nullptr) - : Type(T), Die(Die), CU(CU), OtherInfo(OtherInfo) {} + : Die(Die), Type(T), CU(CU), Flags(0), OtherInfo(OtherInfo) {} WorklistItem(unsigned AncestorIdx, CompileUnit &CU, unsigned Flags) - : Type(WorklistItemType::LookForParentDIEsToKeep), CU(CU), Flags(Flags), - AncestorIdx(AncestorIdx) {} + : Die(), Type(WorklistItemType::LookForParentDIEsToKeep), CU(CU), + Flags(Flags), AncestorIdx(AncestorIdx) {} }; /// returns true if we need to translate strings. bool needToTranslateStrings() { return StringsTranslator != nullptr; } - void reportWarning(const Twine &Warning, const DwarfFile &File, + void reportWarning(const Twine &Warning, const DWARFFile &File, const DWARFDie *DIE = nullptr) const { if (Options.WarningHandler != nullptr) Options.WarningHandler(Warning, File.FileName, DIE); } - void reportError(const Twine &Warning, const DwarfFile &File, + void reportError(const Twine &Warning, const DWARFFile &File, const DWARFDie *DIE = nullptr) const { if (Options.ErrorHandler != nullptr) Options.ErrorHandler(Warning, File.FileName, DIE); @@ -398,18 +407,18 @@ private: void updateAccelKind(DWARFContext &Dwarf); /// Emit warnings as Dwarf compile units to leave a trail after linking. - bool emitPaperTrailWarnings(const DwarfFile &File, + bool emitPaperTrailWarnings(const DWARFFile &File, OffsetsStringPool &StringPool); void copyInvariantDebugSection(DWARFContext &Dwarf); /// Keeps track of data associated with one object during linking. struct LinkContext { - DwarfFile &File; + DWARFFile &File; UnitListTy CompileUnits; bool Skip = false; - LinkContext(DwarfFile &File) : File(File) {} + LinkContext(DWARFFile &File) : File(File) {} /// Clear part of the context that's no longer needed when we're done with /// the debug object. @@ -438,7 +447,7 @@ private: /// kept. All DIEs referenced though attributes should be kept. void lookForRefDIEsToKeep(const DWARFDie &Die, CompileUnit &CU, unsigned Flags, const UnitListTy &Units, - const DwarfFile &File, + const DWARFFile &File, SmallVectorImpl<WorklistItem> &Worklist); /// \defgroup FindRootDIEs Find DIEs corresponding to Address map entries. @@ -450,7 +459,7 @@ private: /// The return value indicates whether the DIE is incomplete. void lookForDIEsToKeep(AddressesMap &RelocMgr, RangesTy &Ranges, const UnitListTy &Units, const DWARFDie &DIE, - const DwarfFile &File, CompileUnit &CU, + const DWARFFile &File, CompileUnit &CU, unsigned Flags); /// If this compile unit is really a skeleton CU that points to a @@ -460,9 +469,8 @@ private: /// pointing to the module, and a DW_AT_gnu_dwo_id with the module /// hash. bool registerModuleReference(DWARFDie CUDie, const DWARFUnit &Unit, - const DwarfFile &File, + const DWARFFile &File, OffsetsStringPool &OffsetsStringPool, - UniquingStringPool &UniquingStringPoolStringPool, DeclContextTree &ODRContexts, uint64_t ModulesEndOffset, unsigned &UnitID, bool IsLittleEndian, unsigned Indent = 0, @@ -473,9 +481,8 @@ private: /// to Units. Error loadClangModule(DWARFDie CUDie, StringRef FilePath, StringRef ModuleName, uint64_t DwoId, - const DwarfFile &File, + const DWARFFile &File, OffsetsStringPool &OffsetsStringPool, - UniquingStringPool &UniquingStringPool, DeclContextTree &ODRContexts, uint64_t ModulesEndOffset, unsigned &UnitID, bool IsLittleEndian, unsigned Indent = 0, bool Quiet = false); @@ -484,22 +491,21 @@ private: void keepDIEAndDependencies(AddressesMap &RelocMgr, RangesTy &Ranges, const UnitListTy &Units, const DWARFDie &DIE, CompileUnit::DIEInfo &MyInfo, - const DwarfFile &File, CompileUnit &CU, + const DWARFFile &File, CompileUnit &CU, bool UseODR); unsigned shouldKeepDIE(AddressesMap &RelocMgr, RangesTy &Ranges, - const DWARFDie &DIE, const DwarfFile &File, + const DWARFDie &DIE, const DWARFFile &File, CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo, unsigned Flags); /// Check if a variable describing DIE should be kept. /// \returns updated TraversalFlags. unsigned shouldKeepVariableDIE(AddressesMap &RelocMgr, const DWARFDie &DIE, - CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo, unsigned Flags); unsigned shouldKeepSubprogramDIE(AddressesMap &RelocMgr, RangesTy &Ranges, - const DWARFDie &DIE, const DwarfFile &File, + const DWARFDie &DIE, const DWARFFile &File, CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo, unsigned Flags); @@ -508,7 +514,7 @@ private: /// RefValue. The resulting DIE might be in another CompileUnit which is /// stored into \p ReferencedCU. \returns null if resolving fails for any /// reason. - DWARFDie resolveDIEReference(const DwarfFile &File, const UnitListTy &Units, + DWARFDie resolveDIEReference(const DWARFFile &File, const UnitListTy &Units, const DWARFFormValue &RefValue, const DWARFDie &DIE, CompileUnit *&RefCU); @@ -523,7 +529,7 @@ private: class DIECloner { DWARFLinker &Linker; DwarfEmitter *Emitter; - DwarfFile &ObjFile; + DWARFFile &ObjFile; /// Allocator used for all the DIEValue objects. BumpPtrAllocator &DIEAlloc; @@ -533,7 +539,7 @@ private: bool Update; public: - DIECloner(DWARFLinker &Linker, DwarfEmitter *Emitter, DwarfFile &ObjFile, + DIECloner(DWARFLinker &Linker, DwarfEmitter *Emitter, DWARFFile &ObjFile, BumpPtrAllocator &DIEAlloc, std::vector<std::unique_ptr<CompileUnit>> &CompileUnits, bool Update) @@ -551,7 +557,7 @@ private: /// applied to the entry point of the function to get the linked address. /// \param Die the output DIE to use, pass NULL to create one. /// \returns the root of the cloned tree or null if nothing was selected. - DIE *cloneDIE(const DWARFDie &InputDIE, const DwarfFile &File, + DIE *cloneDIE(const DWARFDie &InputDIE, const DWARFFile &File, CompileUnit &U, OffsetsStringPool &StringPool, int64_t PCOffset, uint32_t OutOffset, unsigned Flags, bool IsLittleEndian, DIE *Die = nullptr); @@ -560,7 +566,7 @@ private: /// chose to keep above. If there are no valid relocs, then there's /// nothing to clone/emit. uint64_t cloneAllCompileUnits(DWARFContext &DwarfContext, - const DwarfFile &File, + const DWARFFile &File, OffsetsStringPool &StringPool, bool IsLittleEndian); @@ -606,7 +612,7 @@ private: /// Helper for cloneDIE. unsigned cloneAttribute(DIE &Die, const DWARFDie &InputDIE, - const DwarfFile &File, CompileUnit &U, + const DWARFFile &File, CompileUnit &U, OffsetsStringPool &StringPool, const DWARFFormValue &Val, const AttributeSpec AttrSpec, unsigned AttrSize, @@ -627,18 +633,18 @@ private: AttributeSpec AttrSpec, unsigned AttrSize, const DWARFFormValue &Val, - const DwarfFile &File, + const DWARFFile &File, CompileUnit &Unit); /// Clone a DWARF expression that may be referencing another DIE. void cloneExpression(DataExtractor &Data, DWARFExpression Expression, - const DwarfFile &File, CompileUnit &Unit, + const DWARFFile &File, CompileUnit &Unit, SmallVectorImpl<uint8_t> &OutputBuffer); /// Clone an attribute referencing another DIE and add /// it to \p Die. /// \returns the size of the new attribute. - unsigned cloneBlockAttribute(DIE &Die, const DwarfFile &File, + unsigned cloneBlockAttribute(DIE &Die, const DWARFFile &File, CompileUnit &Unit, AttributeSpec AttrSpec, const DWARFFormValue &Val, unsigned AttrSize, bool IsLittleEndian); @@ -654,7 +660,7 @@ private: /// Clone a scalar attribute and add it to \p Die. /// \returns the size of the new attribute. unsigned cloneScalarAttribute(DIE &Die, const DWARFDie &InputDIE, - const DwarfFile &File, CompileUnit &U, + const DWARFFile &File, CompileUnit &U, AttributeSpec AttrSpec, const DWARFFormValue &Val, unsigned AttrSize, AttributesInfo &Info); @@ -670,7 +676,7 @@ private: void copyAbbrev(const DWARFAbbreviationDeclaration &Abbrev, bool hasODR); uint32_t hashFullyQualifiedName(DWARFDie DIE, CompileUnit &U, - const DwarfFile &File, + const DWARFFile &File, int RecurseDepth = 0); /// Helper for cloneDIE. @@ -685,7 +691,7 @@ private: /// Compute and emit debug_ranges section for \p Unit, and /// patch the attributes referencing it. void patchRangesForUnit(const CompileUnit &Unit, DWARFContext &Dwarf, - const DwarfFile &File) const; + const DWARFFile &File) const; /// Generate and emit the DW_AT_ranges attribute for a compile_unit if it had /// one. @@ -695,7 +701,7 @@ private: /// parts according to the linked function ranges and emit the result in the /// debug_line section. void patchLineTableForUnit(CompileUnit &Unit, DWARFContext &OrigDwarf, - const DwarfFile &File); + const DWARFFile &File); /// Emit the accelerator entries for \p Unit. void emitAcceleratorEntriesForUnit(CompileUnit &Unit); @@ -703,7 +709,7 @@ private: void emitAppleAcceleratorEntriesForUnit(CompileUnit &Unit); /// Patch the frame info for an object file and emit it. - void patchFrameInfoForObject(const DwarfFile &, RangesTy &Ranges, + void patchFrameInfoForObject(const DWARFFile &, RangesTy &Ranges, DWARFContext &, unsigned AddressSize); /// FoldingSet that uniques the abbreviations. diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h index 944e7e3501c9..a6310bcb5df1 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h @@ -101,10 +101,7 @@ public: unsigned getUniqueID() const { return ID; } - void createOutputDIE() { - NewUnit.emplace(OrigUnit.getVersion(), OrigUnit.getAddressByteSize(), - OrigUnit.getUnitDIE().getTag()); - } + void createOutputDIE() { NewUnit.emplace(OrigUnit.getUnitDIE().getTag()); } DIE *getOutputUnitDIE() const { if (NewUnit) @@ -123,6 +120,11 @@ public: DIEInfo &getInfo(unsigned Idx) { return Info[Idx]; } const DIEInfo &getInfo(unsigned Idx) const { return Info[Idx]; } + DIEInfo &getInfo(const DWARFDie &Die) { + unsigned Idx = getOrigUnit().getDIEIndex(Die); + return Info[Idx]; + } + uint64_t getStartOffset() const { return StartOffset; } uint64_t getNextUnitOffset() const { return NextUnitOffset; } void setStartOffset(uint64_t DebugInfoSize) { StartOffset = DebugInfoSize; } @@ -157,7 +159,7 @@ public: /// Compute the end offset for this unit. Must be called after the CU's DIEs /// have been cloned. \returns the next unit offset (which is also the /// current debug_info section size). - uint64_t computeNextUnitOffset(); + uint64_t computeNextUnitOffset(uint16_t DwarfVersion); /// Keep track of a forward reference to DIE \p Die in \p RefUnit by \p /// Attr. The attribute should be fixed up later to point to the absolute @@ -235,21 +237,6 @@ public: const std::vector<AccelInfo> &getNamespaces() const { return Namespaces; } const std::vector<AccelInfo> &getObjC() const { return ObjC; } - /// Get the full path for file \a FileNum in the line table - StringRef getResolvedPath(unsigned FileNum) { - if (FileNum >= ResolvedPaths.size()) - return StringRef(); - return ResolvedPaths[FileNum]; - } - - /// Set the fully resolved path for the line-table's file \a FileNum - /// to \a Path. - void setResolvedPath(unsigned FileNum, StringRef Path) { - if (ResolvedPaths.size() <= FileNum) - ResolvedPaths.resize(FileNum + 1); - ResolvedPaths[FileNum] = Path; - } - MCSymbol *getLabelBegin() { return LabelBegin; } void setLabelBegin(MCSymbol *S) { LabelBegin = S; } @@ -308,12 +295,6 @@ private: std::vector<AccelInfo> ObjC; /// @} - /// Cached resolved paths from the line table. - /// Note, the StringRefs here point in to the intern (uniquing) string pool. - /// This means that a StringRef returned here doesn't need to then be uniqued - /// for the purposes of getting a unique address for each string. - std::vector<StringRef> ResolvedPaths; - /// Is this unit subject to the ODR rule? bool HasODR; diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h index e59e15f00a7e..d2274488e85f 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h @@ -15,6 +15,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/NonRelocatableStringpool.h" #include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -31,16 +32,18 @@ class CachedPathResolver { public: /// Resolve a path by calling realpath and cache its result. The returned /// StringRef is interned in the given \p StringPool. - StringRef resolve(std::string Path, NonRelocatableStringpool &StringPool) { + StringRef resolve(const std::string &Path, + NonRelocatableStringpool &StringPool) { StringRef FileName = sys::path::filename(Path); - SmallString<256> ParentPath = sys::path::parent_path(Path); + StringRef ParentPath = sys::path::parent_path(Path); // If the ParentPath has not yet been resolved, resolve and cache it for // future look-ups. if (!ResolvedPaths.count(ParentPath)) { SmallString<256> RealPath; sys::fs::real_path(ParentPath, RealPath); - ResolvedPaths.insert({ParentPath, StringRef(RealPath).str()}); + ResolvedPaths.insert( + {ParentPath, std::string(RealPath.c_str(), RealPath.size())}); } // Join the file name again with the resolved path. @@ -95,7 +98,6 @@ public: void setDefinedInClangModule(bool Val) { DefinedInClangModule = Val; } uint16_t getTag() const { return Tag; } - StringRef getName() const { return Name; } private: friend DeclMapInfo; @@ -129,10 +131,10 @@ public: /// /// FIXME: The invalid bit along the return value is to emulate some /// dsymutil-classic functionality. - PointerIntPair<DeclContext *, 1> - getChildDeclContext(DeclContext &Context, const DWARFDie &DIE, - CompileUnit &Unit, UniquingStringPool &StringPool, - bool InClangModule); + PointerIntPair<DeclContext *, 1> getChildDeclContext(DeclContext &Context, + const DWARFDie &DIE, + CompileUnit &Unit, + bool InClangModule); DeclContext &getRoot() { return Root; } @@ -141,8 +143,19 @@ private: DeclContext Root; DeclContext::Map Contexts; - /// Cache resolved paths from the line table. + /// Cached resolved paths from the line table. + /// The key is <UniqueUnitID, FileIdx>. + using ResolvedPathsMap = DenseMap<std::pair<unsigned, unsigned>, StringRef>; + ResolvedPathsMap ResolvedPaths; + + /// Helper that resolves and caches fragments of file paths. CachedPathResolver PathResolver; + + /// String pool keeping real path bodies. + NonRelocatableStringpool StringPool; + + StringRef getResolvedPath(CompileUnit &CU, unsigned FileNum, + const DWARFDebugLine::LineTable &LineTable); }; /// Info type for the DenseMap storing the DeclContext pointers. diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h index de58f5dedf24..7b0851159252 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h +++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h @@ -64,7 +64,7 @@ public: /// /// As a side effect, this also switches the current Dwarf version /// of the MC layer to the one of U.getOrigUnit(). - void emitCompileUnitHeader(CompileUnit &Unit) override; + void emitCompileUnitHeader(CompileUnit &Unit, unsigned DwarfVersion) override; /// Recursively emit the DIE tree rooted at \p Die. void emitDIE(DIE &Die) override; diff --git a/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h b/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h index 784c47e3bf5d..bb29ef5f2ce8 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h @@ -11,9 +11,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/CodeView/RecordSerialization.h" -#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" @@ -61,12 +61,9 @@ public: ArrayRef<uint8_t> RecordData; }; -template <typename Kind> struct RemappedRecord { - explicit RemappedRecord(const CVRecord<Kind> &R) : OriginalRecord(R) {} - - CVRecord<Kind> OriginalRecord; - SmallVector<std::pair<uint32_t, TypeIndex>, 8> Mappings; -}; +// There are two kinds of codeview records: type and symbol records. +using CVType = CVRecord<TypeLeafKind>; +using CVSymbol = CVRecord<SymbolKind>; template <typename Record, typename Func> Error forEachCodeViewRecord(ArrayRef<uint8_t> StreamBuffer, Func F) { @@ -126,6 +123,12 @@ struct VarStreamArrayExtractor<codeview::CVRecord<Kind>> { } }; +namespace codeview { +using CVSymbolArray = VarStreamArray<CVSymbol>; +using CVTypeArray = VarStreamArray<CVType>; +using CVTypeRange = iterator_range<CVTypeArray::Iterator>; +} // namespace codeview + } // end namespace llvm #endif // LLVM_DEBUGINFO_CODEVIEW_RECORDITERATOR_H diff --git a/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h b/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h index 1615ff41df12..82ef8c173bee 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h @@ -10,9 +10,6 @@ #define LLVM_DEBUGINFO_CODEVIEW_CVSYMBOLVISITOR_H #include "llvm/DebugInfo/CodeView/CVRecord.h" -#include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h" #include "llvm/Support/ErrorOr.h" namespace llvm { diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h index f26e80ebe2a9..d851dea0a27f 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h @@ -15,7 +15,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/GUID.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/Error.h" diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def index ed5c143818e6..48ea7e52c172 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def +++ b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def @@ -15,6 +15,7 @@ #endif #if !defined(CV_REGISTERS_ALL) && !defined(CV_REGISTERS_X86) && \ + !defined(CV_REGISTERS_ARM) && \ !defined(CV_REGISTERS_ARM64) #error Need include at least one register set. #endif @@ -393,13 +394,46 @@ CV_REGISTER(ARM_PC, 25) // Status register -CV_REGISTER(ARM_CPSR, 25) +CV_REGISTER(ARM_CPSR, 26) // ARM VFPv1 registers CV_REGISTER(ARM_FPSCR, 40) CV_REGISTER(ARM_FPEXC, 41) +CV_REGISTER(ARM_FS0, 50) +CV_REGISTER(ARM_FS1, 51) +CV_REGISTER(ARM_FS2, 52) +CV_REGISTER(ARM_FS3, 53) +CV_REGISTER(ARM_FS4, 54) +CV_REGISTER(ARM_FS5, 55) +CV_REGISTER(ARM_FS6, 56) +CV_REGISTER(ARM_FS7, 57) +CV_REGISTER(ARM_FS8, 58) +CV_REGISTER(ARM_FS9, 59) +CV_REGISTER(ARM_FS10, 60) +CV_REGISTER(ARM_FS11, 61) +CV_REGISTER(ARM_FS12, 62) +CV_REGISTER(ARM_FS13, 63) +CV_REGISTER(ARM_FS14, 64) +CV_REGISTER(ARM_FS15, 65) +CV_REGISTER(ARM_FS16, 66) +CV_REGISTER(ARM_FS17, 67) +CV_REGISTER(ARM_FS18, 68) +CV_REGISTER(ARM_FS19, 69) +CV_REGISTER(ARM_FS20, 70) +CV_REGISTER(ARM_FS21, 71) +CV_REGISTER(ARM_FS22, 72) +CV_REGISTER(ARM_FS23, 73) +CV_REGISTER(ARM_FS24, 74) +CV_REGISTER(ARM_FS25, 75) +CV_REGISTER(ARM_FS26, 76) +CV_REGISTER(ARM_FS27, 77) +CV_REGISTER(ARM_FS28, 78) +CV_REGISTER(ARM_FS29, 79) +CV_REGISTER(ARM_FS30, 80) +CV_REGISTER(ARM_FS31, 81) + // ARM VFPv3/NEON registers CV_REGISTER(ARM_FS32, 200) diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h b/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h index 720b1b49581f..624a623e75b8 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h +++ b/llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h @@ -10,10 +10,8 @@ #define LLVM_DEBUGINFO_CODEVIEW_MODULEDEBUGFRAGMENTVISITOR_H #include "llvm/DebugInfo/CodeView/CodeView.h" -#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" #include "llvm/Support/Error.h" -#include <cstdint> namespace llvm { @@ -30,7 +28,6 @@ class DebugStringTableSubsectionRef; class DebugSymbolRVASubsectionRef; class DebugSymbolsSubsectionRef; class DebugUnknownSubsectionRef; -class StringsAndChecksumsRef; class DebugSubsectionVisitor { public: diff --git a/llvm/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h b/llvm/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h index 784fc59484b9..51b8523ed969 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/DebugSymbolsSubsection.h @@ -9,8 +9,8 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLSSUBSECTION_H #define LLVM_DEBUGINFO_CODEVIEW_DEBUGSYMBOLSSUBSECTION_H +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/DebugSubsection.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/Support/Error.h" namespace llvm { diff --git a/llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h b/llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h index 35eeef5a327e..ddbb4e3c5e6c 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h @@ -14,7 +14,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryStreamArray.h" #include "llvm/Support/Error.h" diff --git a/llvm/include/llvm/DebugInfo/CodeView/RecordName.h b/llvm/include/llvm/DebugInfo/CodeView/RecordName.h index cc09db8933bd..8e06be9e41e8 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/RecordName.h +++ b/llvm/include/llvm/DebugInfo/CodeView/RecordName.h @@ -9,7 +9,6 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_RECORDNAME_H #define LLVM_DEBUGINFO_CODEVIEW_RECORDNAME_H -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h index d832a48b1265..aaeffb2446ad 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h @@ -11,8 +11,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringSet.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" namespace llvm { diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h index 4383534b0db2..c37f6b4d5fa7 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h @@ -1003,9 +1003,6 @@ public: uint32_t RecordOffset = 0; }; -using CVSymbol = CVRecord<SymbolKind>; -using CVSymbolArray = VarStreamArray<CVSymbol>; - Expected<CVSymbol> readSymbolFromStream(BinaryStreamRef Stream, uint32_t Offset); diff --git a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h index 57dbc56c0769..71bc70dde6ed 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h +++ b/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h @@ -9,7 +9,8 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORDHELPERS_H #define LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORDHELPERS_H -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" namespace llvm { namespace codeview { diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h b/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h index 102d68c3fb2a..bde5a8b3ab2f 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeCollection.h @@ -10,9 +10,8 @@ #define LLVM_DEBUGINFO_CODEVIEW_TYPECOLLECTION_H #include "llvm/ADT/StringRef.h" - +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" namespace llvm { namespace codeview { diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h index b0a16cccbff3..9f34d026b1ba 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h @@ -86,6 +86,16 @@ struct GloballyHashedType { bool empty() const { return *(const uint64_t*)Hash.data() == 0; } + friend inline bool operator==(const GloballyHashedType &L, + const GloballyHashedType &R) { + return L.Hash == R.Hash; + } + + friend inline bool operator!=(const GloballyHashedType &L, + const GloballyHashedType &R) { + return !(L.Hash == R.Hash); + } + /// Given a sequence of bytes representing a record, compute a global hash for /// this record. Due to the nature of global hashes incorporating the hashes /// of referenced records, this function requires a list of types and ids @@ -161,15 +171,10 @@ struct GloballyHashedType { return Hashes; } }; -#if defined(_MSC_VER) -// is_trivially_copyable is not available in older versions of libc++, but it is -// available in all supported versions of MSVC, so at least this gives us some -// coverage. static_assert(std::is_trivially_copyable<GloballyHashedType>::value, "GloballyHashedType must be trivially copyable so that we can " "reinterpret_cast arrays of hash data to arrays of " "GloballyHashedType"); -#endif } // namespace codeview template <> struct DenseMapInfo<codeview::LocallyHashedType> { @@ -206,7 +211,7 @@ template <> struct DenseMapInfo<codeview::GloballyHashedType> { static bool isEqual(codeview::GloballyHashedType LHS, codeview::GloballyHashedType RHS) { - return LHS.Hash == RHS.Hash; + return LHS == RHS; } }; diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h index b9e2562bfc2b..bdc6cf46509b 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h @@ -116,13 +116,22 @@ public: uint32_t toArrayIndex() const { assert(!isSimple()); - return getIndex() - FirstNonSimpleIndex; + return (getIndex() & ~DecoratedItemIdMask) - FirstNonSimpleIndex; } static TypeIndex fromArrayIndex(uint32_t Index) { return TypeIndex(Index + FirstNonSimpleIndex); } + static TypeIndex fromDecoratedArrayIndex(bool IsItem, uint32_t Index) { + return TypeIndex((Index + FirstNonSimpleIndex) | + (IsItem ? DecoratedItemIdMask : 0)); + } + + TypeIndex removeDecoration() { + return TypeIndex(Index & ~DecoratedItemIdMask); + } + SimpleTypeKind getSimpleKind() const { assert(isSimple()); return static_cast<SimpleTypeKind>(Index & SimpleKindMask); diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h index 469768787274..f4f5835d8b57 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h @@ -10,8 +10,8 @@ #define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEXDISCOVERY_H #include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/CodeView/SymbolRecord.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/Error.h" namespace llvm { diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h b/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h index 35f5c0561138..3b6d1b0b1a70 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h @@ -14,7 +14,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/GUID.h" @@ -32,15 +31,10 @@ using support::little32_t; using support::ulittle16_t; using support::ulittle32_t; -using CVType = CVRecord<TypeLeafKind>; -using RemappedType = RemappedRecord<TypeLeafKind>; - struct CVMemberRecord { TypeLeafKind Kind; ArrayRef<uint8_t> Data; }; -using CVTypeArray = VarStreamArray<CVType>; -using CVTypeRange = iterator_range<CVTypeArray::Iterator>; /// Equvalent to CV_fldattr_t in cvinfo.h. struct MemberAttributes { @@ -703,7 +697,7 @@ public: : TypeRecord(TypeRecordKind::VFTable), CompleteClass(CompleteClass), OverriddenVFTable(OverriddenVFTable), VFPtrOffset(VFPtrOffset) { MethodNames.push_back(Name); - MethodNames.insert(MethodNames.end(), Methods.begin(), Methods.end()); + llvm::append_range(MethodNames, Methods); } TypeIndex getCompleteClass() const { return CompleteClass; } diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h b/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h index 19492b93681c..041f5214967c 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h @@ -9,7 +9,8 @@ #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPERECORDHELPERS_H #define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDHELPERS_H -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" namespace llvm { namespace codeview { diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h index d0506cce8176..04d7c7b0420a 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h @@ -11,7 +11,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/Support/Error.h" namespace llvm { diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h index 661d30d04c94..ae78fe912188 100644 --- a/llvm/include/llvm/DebugInfo/DIContext.h +++ b/llvm/include/llvm/DebugInfo/DIContext.h @@ -35,6 +35,7 @@ struct DILineInfo { static constexpr const char *const Addr2LineBadString = "??"; std::string FileName; std::string FunctionName; + std::string StartFileName; Optional<StringRef> Source; uint32_t Line = 0; uint32_t Column = 0; @@ -43,12 +44,15 @@ struct DILineInfo { // DWARF-specific. uint32_t Discriminator = 0; - DILineInfo() : FileName(BadString), FunctionName(BadString) {} + DILineInfo() + : FileName(BadString), FunctionName(BadString), StartFileName(BadString) { + } bool operator==(const DILineInfo &RHS) const { return Line == RHS.Line && Column == RHS.Column && FileName == RHS.FileName && FunctionName == RHS.FunctionName && - StartLine == RHS.StartLine && Discriminator == RHS.Discriminator; + StartFileName == RHS.StartFileName && StartLine == RHS.StartLine && + Discriminator == RHS.Discriminator; } bool operator!=(const DILineInfo &RHS) const { @@ -56,10 +60,10 @@ struct DILineInfo { } bool operator<(const DILineInfo &RHS) const { - return std::tie(FileName, FunctionName, Line, Column, StartLine, - Discriminator) < - std::tie(RHS.FileName, RHS.FunctionName, RHS.Line, RHS.Column, - RHS.StartLine, RHS.Discriminator); + return std::tie(FileName, FunctionName, StartFileName, Line, Column, + StartLine, Discriminator) < + std::tie(RHS.FileName, RHS.FunctionName, RHS.StartFileName, RHS.Line, + RHS.Column, RHS.StartLine, RHS.Discriminator); } explicit operator bool() const { return *this != DILineInfo(); } @@ -72,6 +76,8 @@ struct DILineInfo { OS << "function '" << FunctionName << "', "; OS << "line " << Line << ", "; OS << "column " << Column << ", "; + if (StartFileName != BadString) + OS << "start file '" << StartFileName << "', "; OS << "start line " << StartLine << '\n'; } }; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index 97903a96b3fc..7d88e1447dca 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -146,6 +146,7 @@ public: bool verify(raw_ostream &OS, DIDumpOptions DumpOpts = {}) override; using unit_iterator_range = DWARFUnitVector::iterator_range; + using compile_unit_range = DWARFUnitVector::compile_unit_range; /// Get units from .debug_info in this context. unit_iterator_range info_section_units() { @@ -163,10 +164,12 @@ public: } /// Get compile units in this context. - unit_iterator_range compile_units() { return info_section_units(); } + compile_unit_range compile_units() { + return make_filter_range(info_section_units(), isCompileUnit); + } - /// Get type units in this context. - unit_iterator_range type_units() { return types_section_units(); } + // If you want type_units(), it'll need to be a concat iterator of a filter of + // TUs in info_section + all the (all type) units in types_section /// Get all normal compile/type units in this context. unit_iterator_range normal_units() { @@ -189,10 +192,13 @@ public: } /// Get compile units in the DWO context. - unit_iterator_range dwo_compile_units() { return dwo_info_section_units(); } + compile_unit_range dwo_compile_units() { + return make_filter_range(dwo_info_section_units(), isCompileUnit); + } - /// Get type units in the DWO context. - unit_iterator_range dwo_type_units() { return dwo_types_section_units(); } + // If you want dwo_type_units(), it'll need to be a concat iterator of a + // filter of TUs in dwo_info_section + all the (all type) units in + // dwo_types_section. /// Get all units in the DWO context. unit_iterator_range dwo_units() { diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h index 32844ffd570f..69e67866946c 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAddr.h @@ -74,6 +74,24 @@ public: /// Return the full length of this table, including the length field. /// Return None if the length cannot be identified reliably. Optional<uint64_t> getFullLength() const; + + /// Return the DWARF format of this table. + dwarf::DwarfFormat getFormat() const { return Format; } + + /// Return the length of this table. + uint64_t getLength() const { return Length; } + + /// Return the version of this table. + uint16_t getVersion() const { return Version; } + + /// Return the address size of this table. + uint8_t getAddressSize() const { return AddrSize; } + + /// Return the segment selector size of this table. + uint8_t getSegmentSelectorSize() const { return SegSize; } + + /// Return the parsed addresses of this table. + ArrayRef<uint64_t> getAddressEntries() const { return Addrs; } }; } // end namespace llvm diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h index 0681a2e33a50..3d5852ee1518 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h @@ -60,7 +60,8 @@ public: DWARFDebugArangeSet() { clear(); } void clear(); - Error extract(DWARFDataExtractor data, uint64_t *offset_ptr); + Error extract(DWARFDataExtractor data, uint64_t *offset_ptr, + function_ref<void(Error)> WarningHandler); void dump(raw_ostream &OS) const; uint64_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; } diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h index 233b55cc55c1..af87811f5d7d 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h @@ -71,8 +71,8 @@ public: /// where a problem occurred in case an error is returned. Error parse(DWARFDataExtractor Data, uint64_t *Offset, uint64_t EndOffset); - void dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH, - unsigned IndentLevel = 1) const; + void dump(raw_ostream &OS, DIDumpOptions DumpOpts, const MCRegisterInfo *MRI, + bool IsEH, unsigned IndentLevel = 1) const; private: std::vector<Instruction> Instructions; @@ -121,7 +121,8 @@ private: static ArrayRef<OperandType[2]> getOperandTypes(); /// Print \p Opcode's operand number \p OperandIdx which has value \p Operand. - void printOperand(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH, + void printOperand(raw_ostream &OS, DIDumpOptions DumpOpts, + const MCRegisterInfo *MRI, bool IsEH, const Instruction &Instr, unsigned OperandIdx, uint64_t Operand) const; }; @@ -146,8 +147,8 @@ public: CFIProgram &cfis() { return CFIs; } /// Dump the instructions in this CFI fragment - virtual void dump(raw_ostream &OS, const MCRegisterInfo *MRI, - bool IsEH) const = 0; + virtual void dump(raw_ostream &OS, DIDumpOptions DumpOpts, + const MCRegisterInfo *MRI, bool IsEH) const = 0; protected: const FrameKind Kind; @@ -201,7 +202,7 @@ public: uint32_t getLSDAPointerEncoding() const { return LSDAPointerEncoding; } - void dump(raw_ostream &OS, const MCRegisterInfo *MRI, + void dump(raw_ostream &OS, DIDumpOptions DumpOpts, const MCRegisterInfo *MRI, bool IsEH) const override; private: @@ -242,7 +243,7 @@ public: uint64_t getAddressRange() const { return AddressRange; } Optional<uint64_t> getLSDAAddress() const { return LSDAAddress; } - void dump(raw_ostream &OS, const MCRegisterInfo *MRI, + void dump(raw_ostream &OS, DIDumpOptions DumpOpts, const MCRegisterInfo *MRI, bool IsEH) const override; static bool classof(const FrameEntry *FE) { return FE->getKind() == FK_FDE; } @@ -285,7 +286,7 @@ public: ~DWARFDebugFrame(); /// Dump the section data into the given stream. - void dump(raw_ostream &OS, const MCRegisterInfo *MRI, + void dump(raw_ostream &OS, DIDumpOptions DumpOpts, const MCRegisterInfo *MRI, Optional<uint64_t> Offset) const; /// Parse the section from raw data. \p Data is assumed to contain the whole diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index fe46d613aedd..bc6c67ae6c5d 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -121,6 +121,8 @@ public: bool hasFileAtIndex(uint64_t FileIndex) const; + Optional<uint64_t> getLastValidFileIndex() const; + bool getFileNameByIndex(uint64_t FileIndex, StringRef CompDir, DILineInfoSpecifier::FileLineInfoKind Kind, @@ -251,6 +253,10 @@ public: return Prologue.hasFileAtIndex(FileIndex); } + Optional<uint64_t> getLastValidFileIndex() const { + return Prologue.getLastValidFileIndex(); + } + /// Extracts filename by its index in filename table in prologue. /// In Dwarf 4, the files are 1-indexed and the current compilation file /// name is not represented in the list. In DWARF v5, the files are @@ -309,12 +315,10 @@ public: /// Helper to allow for parsing of an entire .debug_line section in sequence. class SectionParser { public: - using cu_range = DWARFUnitVector::iterator_range; - using tu_range = DWARFUnitVector::iterator_range; using LineToUnitMap = std::map<uint64_t, DWARFUnit *>; - SectionParser(DWARFDataExtractor &Data, const DWARFContext &C, cu_range CUs, - tu_range TUs); + SectionParser(DWARFDataExtractor &Data, const DWARFContext &C, + DWARFUnitVector::iterator_range Units); /// Get the next line table from the section. Report any issues via the /// handlers. diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h index 3b141304f85f..dbc11c51a789 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h @@ -72,6 +72,8 @@ public: std::function<Optional<object::SectionedAddress>(uint32_t)> LookupAddr, function_ref<bool(Expected<DWARFLocationExpression>)> Callback) const; + const DWARFDataExtractor &getData() { return Data; } + protected: DWARFDataExtractor Data; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h index 4d463d8fe6f5..f1768a1ddab5 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h @@ -96,6 +96,9 @@ class DWARFDebugMacro { MacroHeader Header; SmallVector<Entry, 4> Macros; uint64_t Offset; + + /// Whether or not this is a .debug_macro section. + bool IsDebugMacro; }; /// A list of all the macro entries in the debug_macinfo section. @@ -107,7 +110,7 @@ public: /// Print the macro list found within the debug_macinfo/debug_macro section. void dump(raw_ostream &OS) const; - Error parseMacro(DWARFUnitVector::iterator_range Units, + Error parseMacro(DWARFUnitVector::compile_unit_range Units, DataExtractor StringExtractor, DWARFDataExtractor MacroData) { return parseImpl(Units, StringExtractor, MacroData, /*IsMacro=*/true); @@ -123,7 +126,7 @@ public: private: /// Parse the debug_macinfo/debug_macro section accessible via the 'MacroData' /// parameter. - Error parseImpl(Optional<DWARFUnitVector::iterator_range> Units, + Error parseImpl(Optional<DWARFUnitVector::compile_unit_range> Units, Optional<DataExtractor> StringExtractor, DWARFDataExtractor Data, bool IsMacro); }; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h index 88e5432851d6..4d28bdcde2e4 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h @@ -34,7 +34,7 @@ struct RangeListEntry : public DWARFListEntryBase { uint64_t Value0; uint64_t Value1; - Error extract(DWARFDataExtractor Data, uint64_t End, uint64_t *OffsetPtr); + Error extract(DWARFDataExtractor Data, uint64_t *OffsetPtr); void dump(raw_ostream &OS, uint8_t AddrSize, uint8_t MaxEncodingStringLength, uint64_t &CurrentBase, DIDumpOptions DumpOpts, llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)> @@ -48,6 +48,7 @@ public: /// Build a DWARFAddressRangesVector from a rangelist. DWARFAddressRangesVector getAbsoluteRanges(Optional<object::SectionedAddress> BaseAddr, + uint8_t AddressByteSize, function_ref<Optional<object::SectionedAddress>(uint32_t)> LookupPooledAddress) const; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h index 05a6056e8e21..0f76d7f1b31c 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -262,6 +262,7 @@ public: /// for this subprogram by resolving DW_AT_sepcification or /// DW_AT_abstract_origin references if necessary. uint64_t getDeclLine() const; + std::string getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const; /// Retrieves values of DW_AT_call_file, DW_AT_call_line and DW_AT_call_column /// from DIE (or zeroes if they are missing). This function looks for @@ -381,11 +382,6 @@ inline bool operator==(const DWARFDie::iterator &LHS, return LHS.Die == RHS.Die; } -inline bool operator!=(const DWARFDie::iterator &LHS, - const DWARFDie::iterator &RHS) { - return !(LHS == RHS); -} - // These inline functions must follow the DWARFDie::iterator definition above // as they use functions from that class. inline DWARFDie::iterator DWARFDie::begin() const { diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h index edfa68d49a60..447ad66b9352 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h @@ -10,10 +10,11 @@ #define LLVM_DEBUGINFO_DWARFEXPRESSION_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/ADT/Optional.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DIContext.h" #include "llvm/Support/DataExtractor.h" namespace llvm { @@ -93,8 +94,9 @@ public: bool extract(DataExtractor Data, uint8_t AddressSize, uint64_t Offset, Optional<dwarf::DwarfFormat> Format); bool isError() { return Error; } - bool print(raw_ostream &OS, const DWARFExpression *Expr, - const MCRegisterInfo *RegInfo, DWARFUnit *U, bool isEH); + bool print(raw_ostream &OS, DIDumpOptions DumpOpts, + const DWARFExpression *Expr, const MCRegisterInfo *RegInfo, + DWARFUnit *U, bool isEH); bool verify(DWARFUnit *U); }; @@ -143,7 +145,8 @@ public: iterator begin() const { return iterator(this, 0); } iterator end() const { return iterator(this, Data.getData().size()); } - void print(raw_ostream &OS, const MCRegisterInfo *RegInfo, DWARFUnit *U, + void print(raw_ostream &OS, DIDumpOptions DumpOpts, + const MCRegisterInfo *RegInfo, DWARFUnit *U, bool IsEH = false) const; /// Print the expression in a format intended to be compact and useful to a @@ -164,10 +167,5 @@ inline bool operator==(const DWARFExpression::iterator &LHS, const DWARFExpression::iterator &RHS) { return LHS.Expr == RHS.Expr && LHS.Offset == RHS.Offset; } - -inline bool operator!=(const DWARFExpression::iterator &LHS, - const DWARFExpression::iterator &RHS) { - return !(LHS == RHS); -} } #endif diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index 3f1be4e5a592..1342e645934c 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -82,6 +82,9 @@ public: void dump(raw_ostream &OS, DIDumpOptions DumpOpts = DIDumpOptions()) const; void dumpSectionedAddress(raw_ostream &OS, DIDumpOptions DumpOpts, object::SectionedAddress SA) const; + void dumpAddress(raw_ostream &OS, uint64_t Address) const; + static void dumpAddress(raw_ostream &OS, uint8_t AddressSize, + uint64_t Address); static void dumpAddressSection(const DWARFObject &Obj, raw_ostream &OS, DIDumpOptions DumpOpts, uint64_t SectionIndex); diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h index 496fdb2477f9..8f58b4e6458e 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h @@ -46,7 +46,7 @@ public: const ListEntries &getEntries() const { return Entries; } bool empty() const { return Entries.empty(); } void clear() { Entries.clear(); } - Error extract(DWARFDataExtractor Data, uint64_t HeaderOffset, uint64_t End, + Error extract(DWARFDataExtractor Data, uint64_t HeaderOffset, uint64_t *OffsetPtr, StringRef SectionName, StringRef ListStringName); }; @@ -72,10 +72,6 @@ class DWARFListTableHeader { }; Header HeaderData; - /// The offset table, which contains offsets to the individual list entries. - /// It is used by forms such as DW_FORM_rnglistx. - /// FIXME: Generate the table and use the appropriate forms. - std::vector<uint64_t> Offsets; /// The table's format, either DWARF32 or DWARF64. dwarf::DwarfFormat Format; /// The offset at which the header (and hence the table) is located within @@ -93,7 +89,6 @@ public: void clear() { HeaderData = {}; - Offsets.clear(); } uint64_t getHeaderOffset() const { return HeaderOffset; } uint8_t getAddrSize() const { return HeaderData.AddrSize; } @@ -115,11 +110,23 @@ public: llvm_unreachable("Invalid DWARF format (expected DWARF32 or DWARF64"); } - void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const; - Optional<uint64_t> getOffsetEntry(uint32_t Index) const { - if (Index < Offsets.size()) - return Offsets[Index]; - return None; + void dump(DataExtractor Data, raw_ostream &OS, + DIDumpOptions DumpOpts = {}) const; + Optional<uint64_t> getOffsetEntry(DataExtractor Data, uint32_t Index) const { + if (Index > HeaderData.OffsetEntryCount) + return None; + + return getOffsetEntry(Data, getHeaderOffset() + getHeaderSize(Format), Format, Index); + } + + static Optional<uint64_t> getOffsetEntry(DataExtractor Data, + uint64_t OffsetTableOffset, + dwarf::DwarfFormat Format, + uint32_t Index) { + uint8_t OffsetByteSize = Format == dwarf::DWARF64 ? 8 : 4; + uint64_t Offset = OffsetTableOffset + OffsetByteSize * Index; + auto R = Data.getUnsigned(&Offset, OffsetByteSize); + return R; } /// Extract the table header and the array of offsets. @@ -169,14 +176,14 @@ public: uint8_t getAddrSize() const { return Header.getAddrSize(); } dwarf::DwarfFormat getFormat() const { return Header.getFormat(); } - void dump(raw_ostream &OS, + void dump(DWARFDataExtractor Data, raw_ostream &OS, llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)> LookupPooledAddress, DIDumpOptions DumpOpts = {}) const; /// Return the contents of the offset entry designated by a given index. - Optional<uint64_t> getOffsetEntry(uint32_t Index) const { - return Header.getOffsetEntry(Index); + Optional<uint64_t> getOffsetEntry(DataExtractor Data, uint32_t Index) const { + return Header.getOffsetEntry(Data, Index); } /// Return the size of the table header including the length but not including /// the offsets. This is dependent on the table format, which is unambiguously @@ -196,18 +203,18 @@ Error DWARFListTableBase<DWARFListType>::extract(DWARFDataExtractor Data, return E; Data.setAddressSize(Header.getAddrSize()); - uint64_t End = getHeaderOffset() + Header.length(); - while (*OffsetPtr < End) { + Data = DWARFDataExtractor(Data, getHeaderOffset() + Header.length()); + while (Data.isValidOffset(*OffsetPtr)) { DWARFListType CurrentList; uint64_t Off = *OffsetPtr; - if (Error E = CurrentList.extract(Data, getHeaderOffset(), End, OffsetPtr, + if (Error E = CurrentList.extract(Data, getHeaderOffset(), OffsetPtr, Header.getSectionName(), Header.getListTypeString())) return E; ListMap[Off] = CurrentList; } - assert(*OffsetPtr == End && + assert(*OffsetPtr == Data.size() && "mismatch between expected length of table and length " "of extracted data"); return Error::success(); @@ -215,18 +222,18 @@ Error DWARFListTableBase<DWARFListType>::extract(DWARFDataExtractor Data, template <typename ListEntryType> Error DWARFListType<ListEntryType>::extract(DWARFDataExtractor Data, - uint64_t HeaderOffset, uint64_t End, + uint64_t HeaderOffset, uint64_t *OffsetPtr, StringRef SectionName, StringRef ListTypeString) { - if (*OffsetPtr < HeaderOffset || *OffsetPtr >= End) + if (*OffsetPtr < HeaderOffset || *OffsetPtr >= Data.size()) return createStringError(errc::invalid_argument, "invalid %s list offset 0x%" PRIx64, ListTypeString.data(), *OffsetPtr); Entries.clear(); - while (*OffsetPtr < End) { + while (Data.isValidOffset(*OffsetPtr)) { ListEntryType Entry; - if (Error E = Entry.extract(Data, End, OffsetPtr)) + if (Error E = Entry.extract(Data, OffsetPtr)) return E; Entries.push_back(Entry); if (Entry.isSentinel()) @@ -240,11 +247,11 @@ Error DWARFListType<ListEntryType>::extract(DWARFDataExtractor Data, template <typename DWARFListType> void DWARFListTableBase<DWARFListType>::dump( - raw_ostream &OS, + DWARFDataExtractor Data, raw_ostream &OS, llvm::function_ref<Optional<object::SectionedAddress>(uint32_t)> LookupPooledAddress, DIDumpOptions DumpOpts) const { - Header.dump(OS, DumpOpts); + Header.dump(Data, OS, DumpOpts); OS << HeaderString << "\n"; // Determine the length of the longest encoding string we have in the table, @@ -269,19 +276,14 @@ template <typename DWARFListType> Expected<DWARFListType> DWARFListTableBase<DWARFListType>::findList(DWARFDataExtractor Data, uint64_t Offset) { - auto Entry = ListMap.find(Offset); - if (Entry != ListMap.end()) - return Entry->second; - // Extract the list from the section and enter it into the list map. DWARFListType List; - uint64_t End = getHeaderOffset() + Header.length(); - uint64_t StartingOffset = Offset; + if (Header.length()) + Data = DWARFDataExtractor(Data, getHeaderOffset() + Header.length()); if (Error E = - List.extract(Data, getHeaderOffset(), End, &Offset, + List.extract(Data, Header.length() ? getHeaderOffset() : 0, &Offset, Header.getSectionName(), Header.getListTypeString())) return std::move(E); - ListMap[StartingOffset] = List; return List; } diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 5b3b46626059..369cbdc28c2e 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -113,6 +113,8 @@ public: const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context, DWARFSectionKind Kind); +bool isCompileUnit(const std::unique_ptr<DWARFUnit> &U); + /// Describe a collection of units. Intended to hold all units either from /// .debug_info and .debug_types, or from .debug_info.dwo and .debug_types.dwo. class DWARFUnitVector final : public SmallVector<std::unique_ptr<DWARFUnit>, 1> { @@ -127,6 +129,9 @@ public: using iterator = typename UnitVector::iterator; using iterator_range = llvm::iterator_range<typename UnitVector::iterator>; + using compile_unit_range = + decltype(make_filter_range(std::declval<iterator_range>(), isCompileUnit)); + DWARFUnit *getUnitForOffset(uint64_t Offset) const; DWARFUnit *getUnitForIndexEntry(const DWARFUnitIndex::Entry &E); @@ -204,7 +209,6 @@ class DWARFUnit { const DWARFDebugAbbrev *Abbrev; const DWARFSection *RangeSection; uint64_t RangeSectionBase; - const DWARFSection *LocSection; uint64_t LocSectionBase; /// Location table of this unit. @@ -223,10 +227,6 @@ class DWARFUnit { /// offsets table (DWARF v5). Optional<StrOffsetsContributionDescriptor> StringOffsetsTableContribution; - /// A table of range lists (DWARF v5 and later). - Optional<DWARFDebugRnglistTable> RngListTable; - Optional<DWARFListTableHeader> LoclistTableHeader; - mutable const DWARFAbbreviationDeclarationSet *Abbrevs; llvm::Optional<object::SectionedAddress> BaseAddr; /// The compile unit debug information entry items. @@ -294,6 +294,7 @@ public: dwarf::DwarfFormat getFormat() const { return Header.getFormat(); } uint8_t getUnitType() const { return Header.getUnitType(); } bool isTypeUnit() const { return Header.isTypeUnit(); } + uint64_t getAbbrOffset() const { return Header.getAbbrOffset(); } uint64_t getNextUnitOffset() const { return Header.getNextUnitOffset(); } const DWARFSection &getLineSection() const { return LineSection; } StringRef getStringSection() const { return StringSection; } @@ -313,10 +314,6 @@ public: RangeSection = RS; RangeSectionBase = Base; } - void setLocSection(const DWARFSection *LS, uint64_t Base) { - LocSection = LS; - LocSectionBase = Base; - } uint64_t getLocSectionBase() const { return LocSectionBase; @@ -411,21 +408,10 @@ public: /// Return a rangelist's offset based on an index. The index designates /// an entry in the rangelist table's offset array and is supplied by /// DW_FORM_rnglistx. - Optional<uint64_t> getRnglistOffset(uint32_t Index) { - if (!RngListTable) - return None; - if (Optional<uint64_t> Off = RngListTable->getOffsetEntry(Index)) - return *Off + RangeSectionBase; - return None; - } + Optional<uint64_t> getRnglistOffset(uint32_t Index); + + Optional<uint64_t> getLoclistOffset(uint32_t Index); - Optional<uint64_t> getLoclistOffset(uint32_t Index) { - if (!LoclistTableHeader) - return None; - if (Optional<uint64_t> Off = LoclistTableHeader->getOffsetEntry(Index)) - return *Off + getLocSectionBase(); - return None; - } Expected<DWARFAddressRangesVector> collectAddressRanges(); Expected<DWARFLocationExpressionsVector> @@ -480,7 +466,6 @@ public: /// The unit needs to have its DIEs extracted for this method to work. DWARFDie getDIEForOffset(uint64_t Offset) { extractDIEsIfNeeded(false); - assert(!DieArray.empty()); auto It = llvm::partition_point(DieArray, [=](const DWARFDebugInfoEntry &DIE) { return DIE.getOffset() < Offset; @@ -529,6 +514,10 @@ private: bool parseDWO(); }; +inline bool isCompileUnit(const std::unique_ptr<DWARFUnit> &U) { + return !U->isTypeUnit(); +} + } // end namespace llvm #endif // LLVM_DEBUGINFO_DWARF_DWARFUNIT_H diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index 22b1d722fc89..18d889f5cadb 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -12,25 +12,22 @@ #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" -#include "llvm/DebugInfo/DWARF/DWARFAddressRange.h" #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" - #include <cstdint> #include <map> #include <set> namespace llvm { class raw_ostream; +struct DWARFAddressRange; struct DWARFAttribute; class DWARFContext; -class DWARFDie; -class DWARFUnit; -class DWARFCompileUnit; class DWARFDataExtractor; class DWARFDebugAbbrev; class DataExtractor; struct DWARFSection; +class DWARFUnit; /// A class that verifies DWARF debug information given a DWARF Context. class DWARFVerifier { diff --git a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h index 593d781b990e..473c89e8106f 100644 --- a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h +++ b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h @@ -24,8 +24,6 @@ namespace llvm { namespace msf { -struct MSFLayout; - /// MappedBlockStream represents data stored in an MSF file into chunks of a /// particular size (called the Block Size), and whose chunks may not be /// necessarily contiguous. The arrangement of these chunks MSF the file diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h index beaaef0c5a6c..82b63d729454 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h @@ -34,6 +34,34 @@ struct MSFLayout; } namespace pdb { +// Represents merged or unmerged symbols. Merged symbols can be written to the +// output file as is, but unmerged symbols must be rewritten first. In either +// case, the size must be known up front. +struct SymbolListWrapper { + explicit SymbolListWrapper(ArrayRef<uint8_t> Syms) + : SymPtr(const_cast<uint8_t *>(Syms.data())), SymSize(Syms.size()), + NeedsToBeMerged(false) {} + explicit SymbolListWrapper(void *SymSrc, uint32_t Length) + : SymPtr(SymSrc), SymSize(Length), NeedsToBeMerged(true) {} + + ArrayRef<uint8_t> asArray() const { + return ArrayRef<uint8_t>(static_cast<const uint8_t *>(SymPtr), SymSize); + } + + uint32_t size() const { return SymSize; } + + void *SymPtr = nullptr; + uint32_t SymSize = 0; + bool NeedsToBeMerged = false; +}; + +/// Represents a string table reference at some offset in the module symbol +/// stream. +struct StringTableFixup { + uint32_t StrTabOffset = 0; + uint32_t SymOffsetOfReference = 0; +}; + class DbiModuleDescriptorBuilder { friend class DbiStreamBuilder; @@ -48,10 +76,28 @@ public: void setPdbFilePathNI(uint32_t NI); void setObjFileName(StringRef Name); + + // Callback to merge one source of unmerged symbols. + using MergeSymbolsCallback = Error (*)(void *Ctx, void *Symbols, + BinaryStreamWriter &Writer); + + void setMergeSymbolsCallback(void *Ctx, MergeSymbolsCallback Callback) { + MergeSymsCtx = Ctx; + MergeSymsCallback = Callback; + } + + void setStringTableFixups(std::vector<StringTableFixup> &&Fixups) { + StringTableFixups = std::move(Fixups); + } + void setFirstSectionContrib(const SectionContrib &SC); void addSymbol(codeview::CVSymbol Symbol); void addSymbolsInBulk(ArrayRef<uint8_t> BulkSymbols); + // Add symbols of known size which will be merged (rewritten) when committing + // the PDB to disk. + void addUnmergedSymbols(void *SymSrc, uint32_t SymLength); + void addDebugSubsection(std::shared_ptr<codeview::DebugSubsection> Subsection); @@ -77,8 +123,14 @@ public: void finalize(); Error finalizeMsfLayout(); - Error commit(BinaryStreamWriter &ModiWriter, const msf::MSFLayout &MsfLayout, - WritableBinaryStreamRef MsfBuffer); + /// Commit the DBI descriptor to the DBI stream. + Error commit(BinaryStreamWriter &ModiWriter); + + /// Commit the accumulated symbols to the module symbol stream. Safe to call + /// in parallel on different DbiModuleDescriptorBuilder objects. Only modifies + /// the pre-allocated stream in question. + Error commitSymbolStream(const msf::MSFLayout &MsfLayout, + WritableBinaryStreamRef MsfBuffer); private: uint32_t calculateC13DebugInfoSize() const; @@ -91,7 +143,12 @@ private: std::string ModuleName; std::string ObjFileName; std::vector<std::string> SourceFiles; - std::vector<ArrayRef<uint8_t>> Symbols; + std::vector<SymbolListWrapper> Symbols; + + void *MergeSymsCtx = nullptr; + MergeSymbolsCallback MergeSymsCallback = nullptr; + + std::vector<StringTableFixup> StringTableFixups; std::vector<codeview::DebugSubsectionRecordBuilder> C13Builders; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h new file mode 100644 index 000000000000..480b3fb11419 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h @@ -0,0 +1,41 @@ +//==- NativeEnumSymbols.h - Native Symbols Enumerator impl -------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOLS_H +#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEENUMSYMBOLS_H + +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/PDBSymbol.h" + +#include <vector> + +namespace llvm { +namespace pdb { + +class NativeSession; + +class NativeEnumSymbols : public IPDBEnumChildren<PDBSymbol> { +public: + NativeEnumSymbols(NativeSession &Session, std::vector<SymIndexId> Symbols); + + uint32_t getChildCount() const override; + std::unique_ptr<PDBSymbol> getChildAtIndex(uint32_t Index) const override; + std::unique_ptr<PDBSymbol> getNext() override; + void reset() override; + +private: + std::vector<SymIndexId> Symbols; + uint32_t Index; + NativeSession &Session; +}; + +} // namespace pdb +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h index 4adf89f0d69a..b219055d2153 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h @@ -20,7 +20,7 @@ namespace pdb { class NativeFunctionSymbol : public NativeRawSymbol { public: NativeFunctionSymbol(NativeSession &Session, SymIndexId Id, - const codeview::ProcSym &Sym); + const codeview::ProcSym &Sym, uint32_t RecordOffset); ~NativeFunctionSymbol() override; @@ -30,13 +30,15 @@ public: uint32_t getAddressOffset() const override; uint32_t getAddressSection() const override; std::string getName() const override; - PDB_SymType getSymTag() const override; uint64_t getLength() const override; uint32_t getRelativeVirtualAddress() const override; uint64_t getVirtualAddress() const override; + std::unique_ptr<IPDBEnumSymbols> + findInlineFramesByVA(uint64_t VA) const override; protected: const codeview::ProcSym Sym; + uint32_t RecordOffset = 0; }; } // namespace pdb diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h new file mode 100644 index 000000000000..2f6aba038ae8 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h @@ -0,0 +1,46 @@ +//===- NativeInlineSiteSymbol.h - info about inline sites -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVEINLINESITESYMBOL_H +#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVEINLINESITESYMBOL_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" + +namespace llvm { +namespace pdb { + +class NativeInlineSiteSymbol : public NativeRawSymbol { +public: + NativeInlineSiteSymbol(NativeSession &Session, SymIndexId Id, + const codeview::InlineSiteSym &Sym, + uint64_t ParentAddr); + + ~NativeInlineSiteSymbol() override; + + void dump(raw_ostream &OS, int Indent, PdbSymbolIdField ShowIdFields, + PdbSymbolIdField RecurseIdFields) const override; + + std::string getName() const override; + std::unique_ptr<IPDBEnumLineNumbers> + findInlineeLinesByVA(uint64_t VA, uint32_t Length) const override; + +private: + const codeview::InlineSiteSym Sym; + uint64_t ParentAddr; + + void getLineOffset(uint32_t OffsetInFunc, uint32_t &LineOffset, + uint32_t &FileOffset) const; +}; + +} // namespace pdb +} // namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_NATIVE_NATIVEINLINESITESYMBOL_H diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h index a7ce82c70b08..5dedc70f11ba 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h @@ -22,7 +22,7 @@ public: const codeview::LineInfo Line, uint32_t ColumnNumber, uint32_t Length, uint32_t Section, uint32_t Offset, - uint32_t SrcFileId); + uint32_t SrcFileId, uint32_t CompilandId); uint32_t getLineNumber() const override; uint32_t getLineNumberEnd() const override; @@ -45,6 +45,7 @@ private: uint32_t Offset; uint32_t Length; uint32_t SrcFileId; + uint32_t CompilandId; }; } // namespace pdb } // namespace llvm diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h index 0a1451530f18..9f410e27f4cb 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h @@ -30,7 +30,6 @@ public: uint32_t getAddressOffset() const override; uint32_t getAddressSection() const override; std::string getName() const override; - PDB_SymType getSymTag() const override; uint32_t getRelativeVirtualAddress() const override; uint64_t getVirtualAddress() const override; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h index 342e63599e66..5f8fc587e546 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h @@ -110,9 +110,14 @@ public: const SymbolCache &getSymbolCache() const { return Cache; } uint32_t getRVAFromSectOffset(uint32_t Section, uint32_t Offset) const; uint64_t getVAFromSectOffset(uint32_t Section, uint32_t Offset) const; + bool moduleIndexForVA(uint64_t VA, uint16_t &ModuleIndex) const; + bool moduleIndexForSectOffset(uint32_t Sect, uint32_t Offset, + uint16_t &ModuleIndex) const; + Expected<ModuleDebugStreamRef> getModuleDebugStream(uint32_t Index) const; private: void initializeExeSymbol(); + void parseSectionContribs(); std::unique_ptr<PDBFile> Pdb; std::unique_ptr<BumpPtrAllocator> Allocator; @@ -120,6 +125,12 @@ private: SymbolCache Cache; SymIndexId ExeSymbol = 0; uint64_t LoadAddress = 0; + + /// Map from virtual address to module index. + using IMap = + IntervalMap<uint64_t, uint16_t, 8, IntervalMapHalfOpenInfo<uint64_t>>; + IMap::Allocator IMapAllocator; + IMap AddrToModuleIndex; }; } // namespace pdb } // namespace llvm diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h b/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h index 90fd19a7a2fb..1ff6ca173b2b 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/SymbolCache.h @@ -37,40 +37,40 @@ class SymbolCache { /// an Id. Id allocation is an implementation, with the only guarantee /// being that once an Id is allocated, the symbol can be assumed to be /// cached. - std::vector<std::unique_ptr<NativeRawSymbol>> Cache; + mutable std::vector<std::unique_ptr<NativeRawSymbol>> Cache; /// For type records from the TPI stream which have been paresd and cached, /// stores a mapping to SymIndexId of the cached symbol. - DenseMap<codeview::TypeIndex, SymIndexId> TypeIndexToSymbolId; + mutable DenseMap<codeview::TypeIndex, SymIndexId> TypeIndexToSymbolId; /// For field list members which have been parsed and cached, stores a mapping /// from (IndexOfClass, MemberIndex) to the corresponding SymIndexId of the /// cached symbol. - DenseMap<std::pair<codeview::TypeIndex, uint32_t>, SymIndexId> + mutable DenseMap<std::pair<codeview::TypeIndex, uint32_t>, SymIndexId> FieldListMembersToSymbolId; /// List of SymIndexIds for each compiland, indexed by compiland index as they /// appear in the PDB file. - std::vector<SymIndexId> Compilands; + mutable std::vector<SymIndexId> Compilands; /// List of source files, indexed by unique source file index. mutable std::vector<std::unique_ptr<NativeSourceFile>> SourceFiles; + + /// Map from string table offset to source file Id. mutable DenseMap<uint32_t, SymIndexId> FileNameOffsetToId; /// Map from global symbol offset to SymIndexId. - DenseMap<uint32_t, SymIndexId> GlobalOffsetToSymbolId; - - /// Map from segment and code offset to SymIndexId. - DenseMap<std::pair<uint32_t, uint32_t>, SymIndexId> AddressToFunctionSymId; - DenseMap<std::pair<uint32_t, uint32_t>, SymIndexId> AddressToPublicSymId; + mutable DenseMap<uint32_t, SymIndexId> GlobalOffsetToSymbolId; - /// Map from virtual address to module index. - using IMap = - IntervalMap<uint64_t, uint16_t, 8, IntervalMapHalfOpenInfo<uint64_t>>; - IMap::Allocator IMapAllocator; - IMap AddrToModuleIndex; + /// Map from segment and code offset to function symbols. + mutable DenseMap<std::pair<uint32_t, uint32_t>, SymIndexId> AddressToSymbolId; + /// Map from segment and code offset to public symbols. + mutable DenseMap<std::pair<uint32_t, uint32_t>, SymIndexId> + AddressToPublicSymId; - Expected<ModuleDebugStreamRef> getModuleDebugStream(uint32_t Index) const; + /// Map from module index and symbol table offset to SymIndexId. + mutable DenseMap<std::pair<uint16_t, uint32_t>, SymIndexId> + SymTabOffsetToSymbolId; struct LineTableEntry { uint64_t Addr; @@ -83,7 +83,7 @@ class SymbolCache { std::vector<LineTableEntry> findLineTable(uint16_t Modi) const; mutable DenseMap<uint16_t, std::vector<LineTableEntry>> LineTable; - SymIndexId createSymbolPlaceholder() { + SymIndexId createSymbolPlaceholder() const { SymIndexId Id = Cache.size(); Cache.push_back(nullptr); return Id; @@ -91,7 +91,7 @@ class SymbolCache { template <typename ConcreteSymbolT, typename CVRecordT, typename... Args> SymIndexId createSymbolForType(codeview::TypeIndex TI, codeview::CVType CVT, - Args &&... ConstructorArgs) { + Args &&...ConstructorArgs) const { CVRecordT Record; if (auto EC = codeview::TypeDeserializer::deserializeAs<CVRecordT>(CVT, Record)) { @@ -104,10 +104,10 @@ class SymbolCache { } SymIndexId createSymbolForModifiedType(codeview::TypeIndex ModifierTI, - codeview::CVType CVT); + codeview::CVType CVT) const; SymIndexId createSimpleType(codeview::TypeIndex TI, - codeview::ModifierOptions Mods); + codeview::ModifierOptions Mods) const; std::unique_ptr<PDBSymbol> findFunctionSymbolBySectOffset(uint32_t Sect, uint32_t Offset); @@ -118,7 +118,7 @@ public: SymbolCache(NativeSession &Session, DbiStream *Dbi); template <typename ConcreteSymbolT, typename... Args> - SymIndexId createSymbol(Args &&... ConstructorArgs) { + SymIndexId createSymbol(Args &&...ConstructorArgs) const { SymIndexId Id = Cache.size(); // Initial construction must not access the cache, since it must be done @@ -145,7 +145,7 @@ public: std::unique_ptr<IPDBEnumSymbols> createGlobalsEnumerator(codeview::SymbolKind Kind); - SymIndexId findSymbolByTypeIndex(codeview::TypeIndex TI); + SymIndexId findSymbolByTypeIndex(codeview::TypeIndex TI) const; template <typename ConcreteSymbolT, typename... Args> SymIndexId getOrCreateFieldListMember(codeview::TypeIndex FieldListTI, @@ -163,6 +163,9 @@ public: } SymIndexId getOrCreateGlobalSymbolByOffset(uint32_t Offset); + SymIndexId getOrCreateInlineSymbol(codeview::InlineSiteSym Sym, + uint64_t ParentAddr, uint16_t Modi, + uint32_t RecordOffset) const; std::unique_ptr<PDBSymbol> findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, PDB_SymType Type); @@ -185,9 +188,6 @@ public: std::unique_ptr<IPDBSourceFile> getSourceFileById(SymIndexId FileId) const; SymIndexId getOrCreateSourceFile(const codeview::FileChecksumEntry &Checksum) const; - - void parseSectionContribs(); - Optional<uint16_t> getModuleIndexForAddr(uint64_t Addr) const; }; } // namespace pdb diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h index 1b7fd2d54cb2..70288868ca21 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h @@ -9,7 +9,7 @@ #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H #define LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H -#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/CVRecord.h" #include "llvm/DebugInfo/PDB/Native/HashTable.h" #include "llvm/DebugInfo/PDB/Native/RawConstants.h" #include "llvm/DebugInfo/PDB/Native/RawTypes.h" diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h index 72d98e9c2c4d..9ef2ee6a9307 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h @@ -54,16 +54,20 @@ public: void setVersionHeader(PdbRaw_TpiVer Version); void addTypeRecord(ArrayRef<uint8_t> Type, Optional<uint32_t> Hash); + void addTypeRecords(ArrayRef<uint8_t> Types, ArrayRef<uint16_t> Sizes, + ArrayRef<uint32_t> Hashes); Error finalizeMsfLayout(); - uint32_t getRecordCount() const { return TypeRecords.size(); } + uint32_t getRecordCount() const { return TypeRecordCount; } Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer); uint32_t calculateSerializedLength(); private: + void updateTypeIndexOffsets(ArrayRef<uint16_t> Sizes); + uint32_t calculateHashBufferSize() const; uint32_t calculateIndexOffsetSize() const; Error finalize(); @@ -71,10 +75,11 @@ private: msf::MSFBuilder &Msf; BumpPtrAllocator &Allocator; + uint32_t TypeRecordCount = 0; size_t TypeRecordBytes = 0; PdbRaw_TpiVer VerHeader = PdbRaw_TpiVer::PdbTpiV80; - std::vector<ArrayRef<uint8_t>> TypeRecords; + std::vector<ArrayRef<uint8_t>> TypeRecBuffers; std::vector<uint32_t> TypeHashes; std::vector<codeview::TypeIndexOffset> TypeIndexOffsets; uint32_t HashStreamIndex = kInvalidStreamIndex; diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h b/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h index 45aba013e7c8..802d18a069ee 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBExtras.h @@ -9,16 +9,15 @@ #ifndef LLVM_DEBUGINFO_PDB_PDBEXTRAS_H #define LLVM_DEBUGINFO_PDB_PDBEXTRAS_H +#include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/PDB/PDBTypes.h" #include "llvm/Support/raw_ostream.h" - +#include <cstdint> #include <unordered_map> namespace llvm { -class raw_ostream; - namespace pdb { using TagStats = std::unordered_map<PDB_SymType, int>; @@ -51,7 +50,6 @@ void dumpSymbolField(raw_ostream &OS, StringRef Name, T Value, int Indent) { OS << Name << ": " << Value; } - } // end namespace pdb } // end namespace llvm diff --git a/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h b/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h index 2982146f960c..24cf1e459f92 100644 --- a/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h +++ b/llvm/include/llvm/DebugInfo/PDB/PDBSymbol.h @@ -42,7 +42,6 @@ class StringRef; class raw_ostream; namespace pdb { -class IPDBRawSymbol; class IPDBSession; #define DECLARE_PDB_SYMBOL_CONCRETE_TYPE(TagValue) \ @@ -141,7 +140,14 @@ public: StringRef Name, PDB_NameSearchFlags Flags, uint32_t RVA) const; + std::unique_ptr<IPDBEnumSymbols> findInlineFramesByVA(uint64_t VA) const; std::unique_ptr<IPDBEnumSymbols> findInlineFramesByRVA(uint32_t RVA) const; + std::unique_ptr<IPDBEnumLineNumbers> + findInlineeLinesByVA(uint64_t VA, uint32_t Length) const; + std::unique_ptr<IPDBEnumLineNumbers> + findInlineeLinesByRVA(uint32_t RVA, uint32_t Length) const; + + std::string getName() const; const IPDBRawSymbol &getRawSymbol() const { return *RawSymbol; } IPDBRawSymbol &getRawSymbol() { return *RawSymbol; } diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h index 085e4bb4ccb8..1c8fa11660af 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -43,7 +43,7 @@ public: bool Demangle = true; bool RelativeAddresses = false; bool UntagAddresses = false; - bool UseNativePDBReader = false; + bool UseDIA = false; std::string DefaultArch; std::vector<std::string> DsymHints; std::string FallbackDebugPath; diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 6ab873218386..6bfc02d15379 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -82,6 +82,7 @@ X(PostfixExpr) \ X(ConditionalExpr) \ X(MemberExpr) \ + X(SubobjectExpr) \ X(EnclosingExpr) \ X(CastExpr) \ X(SizeofParamPackExpr) \ @@ -91,6 +92,7 @@ X(PrefixExpr) \ X(FunctionParam) \ X(ConversionExpr) \ + X(PointerToMemberConversionExpr) \ X(InitListExpr) \ X(FoldExpr) \ X(ThrowExpr) \ @@ -1656,6 +1658,40 @@ public: } }; +class SubobjectExpr : public Node { + const Node *Type; + const Node *SubExpr; + StringView Offset; + NodeArray UnionSelectors; + bool OnePastTheEnd; + +public: + SubobjectExpr(const Node *Type_, const Node *SubExpr_, StringView Offset_, + NodeArray UnionSelectors_, bool OnePastTheEnd_) + : Node(KSubobjectExpr), Type(Type_), SubExpr(SubExpr_), Offset(Offset_), + UnionSelectors(UnionSelectors_), OnePastTheEnd(OnePastTheEnd_) {} + + template<typename Fn> void match(Fn F) const { + F(Type, SubExpr, Offset, UnionSelectors, OnePastTheEnd); + } + + void printLeft(OutputStream &S) const override { + SubExpr->print(S); + S += ".<"; + Type->print(S); + S += " at offset "; + if (Offset.empty()) { + S += "0"; + } else if (Offset[0] == 'n') { + S += "-"; + S += Offset.dropFront(); + } else { + S += Offset; + } + S += ">"; + } +}; + class EnclosingExpr : public Node { const StringView Prefix; const Node *Infix; @@ -1843,6 +1879,28 @@ public: } }; +class PointerToMemberConversionExpr : public Node { + const Node *Type; + const Node *SubExpr; + StringView Offset; + +public: + PointerToMemberConversionExpr(const Node *Type_, const Node *SubExpr_, + StringView Offset_) + : Node(KPointerToMemberConversionExpr), Type(Type_), SubExpr(SubExpr_), + Offset(Offset_) {} + + template<typename Fn> void match(Fn F) const { F(Type, SubExpr, Offset); } + + void printLeft(OutputStream &S) const override { + S += "("; + Type->print(S); + S += ")("; + SubExpr->print(S); + S += ")"; + } +}; + class InitListExpr : public Node { const Node *Ty; NodeArray Inits; @@ -2313,9 +2371,9 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser { TemplateParamList Params; public: - ScopedTemplateParamList(AbstractManglingParser *Parser) - : Parser(Parser), - OldNumTemplateParamLists(Parser->TemplateParams.size()) { + ScopedTemplateParamList(AbstractManglingParser *TheParser) + : Parser(TheParser), + OldNumTemplateParamLists(TheParser->TemplateParams.size()) { Parser->TemplateParams.push_back(&Params); } ~ScopedTemplateParamList() { @@ -2437,6 +2495,8 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser { Node *parseConversionExpr(); Node *parseBracedExpr(); Node *parseFoldExpr(); + Node *parsePointerToMemberConversionExpr(); + Node *parseSubobjectExpr(); /// Parse the <type> production. Node *parseType(); @@ -4404,6 +4464,50 @@ Node *AbstractManglingParser<Derived, Alloc>::parseFoldExpr() { return make<FoldExpr>(IsLeftFold, OperatorName, Pack, Init); } +// <expression> ::= mc <parameter type> <expr> [<offset number>] E +// +// Not yet in the spec: https://github.com/itanium-cxx-abi/cxx-abi/issues/47 +template <typename Derived, typename Alloc> +Node *AbstractManglingParser<Derived, Alloc>::parsePointerToMemberConversionExpr() { + Node *Ty = getDerived().parseType(); + if (!Ty) + return nullptr; + Node *Expr = getDerived().parseExpr(); + if (!Expr) + return nullptr; + StringView Offset = getDerived().parseNumber(true); + if (!consumeIf('E')) + return nullptr; + return make<PointerToMemberConversionExpr>(Ty, Expr, Offset); +} + +// <expression> ::= so <referent type> <expr> [<offset number>] <union-selector>* [p] E +// <union-selector> ::= _ [<number>] +// +// Not yet in the spec: https://github.com/itanium-cxx-abi/cxx-abi/issues/47 +template <typename Derived, typename Alloc> +Node *AbstractManglingParser<Derived, Alloc>::parseSubobjectExpr() { + Node *Ty = getDerived().parseType(); + if (!Ty) + return nullptr; + Node *Expr = getDerived().parseExpr(); + if (!Expr) + return nullptr; + StringView Offset = getDerived().parseNumber(true); + size_t SelectorsBegin = Names.size(); + while (consumeIf('_')) { + Node *Selector = make<NameType>(parseNumber()); + if (!Selector) + return nullptr; + Names.push_back(Selector); + } + bool OnePastTheEnd = consumeIf('p'); + if (!consumeIf('E')) + return nullptr; + return make<SubobjectExpr>( + Ty, Expr, Offset, popTrailingNodeArray(SelectorsBegin), OnePastTheEnd); +} + // <expression> ::= <unary operator-name> <expression> // ::= <binary operator-name> <expression> <expression> // ::= <ternary operator-name> <expression> <expression> <expression> @@ -4661,6 +4765,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() { return nullptr; case 'm': switch (First[1]) { + case 'c': + First += 2; + return parsePointerToMemberConversionExpr(); case 'i': First += 2; return getDerived().parseBinaryExpr("-"); @@ -4808,6 +4915,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() { return Ex; return make<CastExpr>("static_cast", T, Ex); } + case 'o': + First += 2; + return parseSubobjectExpr(); case 'p': { First += 2; Node *Child = getDerived().parseExpr(); @@ -4975,6 +5085,16 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSpecialName() { switch (look()) { case 'T': switch (look(1)) { + // TA <template-arg> # template parameter object + // + // Not yet in the spec: https://github.com/itanium-cxx-abi/cxx-abi/issues/63 + case 'A': { + First += 2; + Node *Arg = getDerived().parseTemplateArg(); + if (Arg == nullptr) + return nullptr; + return make<SpecialName>("template parameter object for ", Arg); + } // TV <type> # virtual table case 'V': { First += 2; @@ -5103,7 +5223,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseEncoding() { decltype(TemplateParams) OldParams; public: - SaveTemplateParams(AbstractManglingParser *Parser) : Parser(Parser) { + SaveTemplateParams(AbstractManglingParser *TheParser) : Parser(TheParser) { OldParams = std::move(Parser->TemplateParams); Parser->TemplateParams.clear(); } @@ -5203,7 +5323,12 @@ struct FloatData<long double> #else static const size_t mangled_size = 20; // May need to be adjusted to 16 or 24 on other platforms #endif - static const size_t max_demangled_size = 40; + // `-0x1.ffffffffffffffffffffffffffffp+16383` + 'L' + '\0' == 42 bytes. + // 28 'f's * 4 bits == 112 bits, which is the number of mantissa bits. + // Negatives are one character longer than positives. + // `0x1.` and `p` are constant, and exponents `+16383` and `-16382` are the + // same length. 1 sign bit, 112 mantissa bits, and 15 exponent bits == 128. + static const size_t max_demangled_size = 42; static constexpr const char *spec = "%LaL"; }; diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h index 04e1936ebbe7..846a5f0818e7 100644 --- a/llvm/include/llvm/Demangle/Utility.h +++ b/llvm/include/llvm/Demangle/Utility.h @@ -52,7 +52,7 @@ class OutputStream { char *TempPtr = std::end(Temp); while (N) { - *--TempPtr = '0' + char(N % 10); + *--TempPtr = char('0' + N % 10); N /= 10; } diff --git a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h index 2562da7cf60b..2e386518f0bf 100644 --- a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -142,11 +142,6 @@ protected: std::shared_ptr<LegacyJITSymbolResolver> SR, std::unique_ptr<TargetMachine> TM); - static ExecutionEngine *(*OrcMCJITReplacementCtor)( - std::string *ErrorStr, std::shared_ptr<MCJITMemoryManager> MM, - std::shared_ptr<LegacyJITSymbolResolver> SR, - std::unique_ptr<TargetMachine> TM); - static ExecutionEngine *(*InterpCtor)(std::unique_ptr<Module> M, std::string *ErrorStr); @@ -552,7 +547,6 @@ private: std::string MCPU; SmallVector<std::string, 4> MAttrs; bool VerifyModules; - bool UseOrcMCJITReplacement; bool EmulatedTLS = true; public: @@ -648,17 +642,6 @@ public: return *this; } - // Use OrcMCJITReplacement instead of MCJIT. Off by default. - LLVM_ATTRIBUTE_DEPRECATED( - inline void setUseOrcMCJITReplacement(bool UseOrcMCJITReplacement), - "ORCv1 utilities (including OrcMCJITReplacement) are deprecated. Please " - "use ORCv2/LLJIT instead (see docs/ORCv2.rst)"); - - void setUseOrcMCJITReplacement(ORCv1DeprecationAcknowledgement, - bool UseOrcMCJITReplacement) { - this->UseOrcMCJITReplacement = UseOrcMCJITReplacement; - } - void setEmulatedTLS(bool EmulatedTLS) { this->EmulatedTLS = EmulatedTLS; } @@ -679,10 +662,6 @@ public: ExecutionEngine *create(TargetMachine *TM); }; -void EngineBuilder::setUseOrcMCJITReplacement(bool UseOrcMCJITReplacement) { - this->UseOrcMCJITReplacement = UseOrcMCJITReplacement; -} - // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionEngine, LLVMExecutionEngineRef) diff --git a/llvm/include/llvm/ExecutionEngine/JITEventListener.h b/llvm/include/llvm/ExecutionEngine/JITEventListener.h index 606b6f7cc128..4eefd993de2b 100644 --- a/llvm/include/llvm/ExecutionEngine/JITEventListener.h +++ b/llvm/include/llvm/ExecutionEngine/JITEventListener.h @@ -20,7 +20,6 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/Support/CBindingWrapping.h" #include <cstdint> -#include <vector> namespace llvm { diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h b/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h index 72687682f606..ec78d9db40b6 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h @@ -21,14 +21,6 @@ namespace llvm { namespace jitlink { -/// Registers all FDEs in the given eh-frame section with the current process. -Error registerEHFrameSection(const void *EHFrameSectionAddr, - size_t EHFrameSectionSize); - -/// Deregisters all FDEs in the given eh-frame section with the current process. -Error deregisterEHFrameSection(const void *EHFrameSectionAddr, - size_t EHFrameSectionSize); - /// Supports registration/deregistration of EH-frames in a target process. class EHFrameRegistrar { public: @@ -42,32 +34,11 @@ public: /// Registers / Deregisters EH-frames in the current process. class InProcessEHFrameRegistrar final : public EHFrameRegistrar { public: - /// Get a reference to the InProcessEHFrameRegistrar singleton. - static InProcessEHFrameRegistrar &getInstance(); - - InProcessEHFrameRegistrar(const InProcessEHFrameRegistrar &) = delete; - InProcessEHFrameRegistrar & - operator=(const InProcessEHFrameRegistrar &) = delete; - - InProcessEHFrameRegistrar(InProcessEHFrameRegistrar &&) = delete; - InProcessEHFrameRegistrar &operator=(InProcessEHFrameRegistrar &&) = delete; - Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, - size_t EHFrameSectionSize) override { - return registerEHFrameSection( - jitTargetAddressToPointer<void *>(EHFrameSectionAddr), - EHFrameSectionSize); - } + size_t EHFrameSectionSize) override; Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, - size_t EHFrameSectionSize) override { - return deregisterEHFrameSection( - jitTargetAddressToPointer<void *>(EHFrameSectionAddr), - EHFrameSectionSize); - } - -private: - InProcessEHFrameRegistrar(); + size_t EHFrameSectionSize) override; }; using StoreFrameRangeFunction = diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF.h index 9f6ea5271f4b..8912f3a2db45 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF.h @@ -19,11 +19,20 @@ namespace llvm { namespace jitlink { -/// jit-link the given ObjBuffer, which must be a ELF object file. +/// Create a LinkGraph from an ELF relocatable object. +/// +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected<std::unique_ptr<LinkGraph>> +createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer); + +/// Link the given graph. /// /// Uses conservative defaults for GOT and stub handling based on the target /// platform. -void jitLink_ELF(std::unique_ptr<JITLinkContext> Ctx); +void link_ELF(std::unique_ptr<LinkGraph> G, + std::unique_ptr<JITLinkContext> Ctx); } // end namespace jitlink } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h index 7860088f3569..1423b0c30b2a 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h @@ -44,8 +44,20 @@ enum ELFX86RelocationKind : Edge::Kind { } // end namespace ELF_x86_64_Edges +/// Create a LinkGraph from an ELF/x86-64 relocatable object. +/// +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected<std::unique_ptr<LinkGraph>> +createLinkGraphFromELFObject_x86_64(MemoryBufferRef ObjectBuffer); + /// jit-link the given object buffer, which must be a ELF x86-64 object file. -void jitLink_ELF_x86_64(std::unique_ptr<JITLinkContext> Ctx); +void link_ELF_x86_64(std::unique_ptr<LinkGraph> G, + std::unique_ptr<JITLinkContext> Ctx); + +/// Return the string name of the given ELF x86-64 edge kind. +StringRef getELFX86RelocationKindName(Edge::Kind R); } // end namespace jitlink } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 76f9dea4160f..e8c0e28b83aa 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -395,6 +395,10 @@ public: return Name; } + /// Rename this symbol. The client is responsible for updating scope and + /// linkage if this name-change requires it. + void setName(StringRef Name) { this->Name = Name; } + /// Returns true if this Symbol has content (potentially) defined within this /// object file (i.e. is anything but an external or absolute symbol). bool isDefined() const { @@ -782,21 +786,48 @@ public: Section::const_block_iterator, const Block *, getSectionConstBlocks>; - LinkGraph(std::string Name, unsigned PointerSize, + LinkGraph(std::string Name, const Triple &TT, unsigned PointerSize, support::endianness Endianness) - : Name(std::move(Name)), PointerSize(PointerSize), + : Name(std::move(Name)), TT(TT), PointerSize(PointerSize), Endianness(Endianness) {} /// Returns the name of this graph (usually the name of the original /// underlying MemoryBuffer). const std::string &getName() { return Name; } + /// Returns the target triple for this Graph. + const Triple &getTargetTriple() const { return TT; } + /// Returns the pointer size for use in this graph. unsigned getPointerSize() const { return PointerSize; } /// Returns the endianness of content in this graph. support::endianness getEndianness() const { return Endianness; } + /// Allocate a copy of the given string using the LinkGraph's allocator. + /// This can be useful when renaming symbols or adding new content to the + /// graph. + StringRef allocateString(StringRef Source) { + auto *AllocatedBuffer = Allocator.Allocate<char>(Source.size()); + llvm::copy(Source, AllocatedBuffer); + return StringRef(AllocatedBuffer, Source.size()); + } + + /// Allocate a copy of the given string using the LinkGraph's allocator. + /// This can be useful when renaming symbols or adding new content to the + /// graph. + /// + /// Note: This Twine-based overload requires an extra string copy and an + /// extra heap allocation for large strings. The StringRef overload should + /// be preferred where possible. + StringRef allocateString(Twine Source) { + SmallString<256> TmpBuffer; + auto SourceStr = Source.toStringRef(TmpBuffer); + auto *AllocatedBuffer = Allocator.Allocate<char>(SourceStr.size()); + llvm::copy(SourceStr, AllocatedBuffer); + return StringRef(AllocatedBuffer, SourceStr.size()); + } + /// Create a section with the given name, protection flags, and alignment. Section &createSection(StringRef Name, sys::Memory::ProtectionFlags Prot) { std::unique_ptr<Section> Sec(new Section(Name, Prot, Sections.size())); @@ -959,7 +990,7 @@ public: Section &Sec = Sym.getBlock().getSection(); Sec.removeSymbol(Sym); } - Sym.makeExternal(createAddressable(false)); + Sym.makeExternal(createAddressable(0, false)); ExternalSymbols.insert(&Sym); } @@ -1019,6 +1050,7 @@ private: BumpPtrAllocator Allocator; std::string Name; + Triple TT; unsigned PointerSize; support::endianness Endianness; SectionList Sections; @@ -1191,15 +1223,31 @@ struct PassConfiguration { /// Notable use cases: Building GOT, stub, and TLV symbols. LinkGraphPassList PostPrunePasses; + /// Post-allocation passes. + /// + /// These passes are called on the graph after memory has been allocated and + /// defined nodes have been assigned their final addresses, but before the + /// context has been notified of these addresses. At this point externals + /// have not been resolved, and symbol content has not yet been copied into + /// working memory. + /// + /// Notable use cases: Setting up data structures associated with addresses + /// of defined symbols (e.g. a mapping of __dso_handle to JITDylib* for the + /// JIT runtime) -- using a PostAllocationPass for this ensures that the + /// data structures are in-place before any query for resolved symbols + /// can complete. + LinkGraphPassList PostAllocationPasses; + /// Pre-fixup passes. /// /// These passes are called on the graph after memory has been allocated, - /// content copied into working memory, and nodes have been assigned their - /// final addresses. + /// content copied into working memory, and all nodes (including externals) + /// have been assigned their final addresses, but before any fixups have been + /// applied. /// /// Notable use cases: Late link-time optimizations like GOT and stub /// elimination. - LinkGraphPassList PostAllocationPasses; + LinkGraphPassList PreFixupPasses; /// Post-fixup passes. /// @@ -1255,16 +1303,18 @@ class JITLinkContext { public: using LookupMap = DenseMap<StringRef, SymbolLookupFlags>; + /// Create a JITLinkContext. + JITLinkContext(const JITLinkDylib *JD) : JD(JD) {} + /// Destroy a JITLinkContext. virtual ~JITLinkContext(); + /// Return the JITLinkDylib that this link is targeting, if any. + const JITLinkDylib *getJITLinkDylib() const { return JD; } + /// Return the MemoryManager to be used for this link. virtual JITLinkMemoryManager &getMemoryManager() = 0; - /// Returns a StringRef for the object buffer. - /// This method can not be called once takeObjectBuffer has been called. - virtual MemoryBufferRef getObjectBuffer() const = 0; - /// Notify this context that linking failed. /// Called by JITLink if linking cannot be completed. virtual void notifyFailed(Error Err) = 0; @@ -1279,7 +1329,11 @@ public: /// their final memory locations in the target process. At this point the /// LinkGraph can be inspected to build a symbol table, however the block /// content will not generally have been copied to the target location yet. - virtual void notifyResolved(LinkGraph &G) = 0; + /// + /// If the client detects an error in the LinkGraph state (e.g. unexpected or + /// missing symbols) they may return an error here. The error will be + /// propagated to notifyFailed and the linker will bail out. + virtual Error notifyResolved(LinkGraph &G) = 0; /// Called by JITLink to notify the context that the object has been /// finalized (i.e. emitted to memory and memory permissions set). If all of @@ -1305,16 +1359,25 @@ public: /// Called by JITLink to modify the pass pipeline prior to linking. /// The default version performs no modification. virtual Error modifyPassConfig(const Triple &TT, PassConfiguration &Config); + +private: + const JITLinkDylib *JD = nullptr; }; /// Marks all symbols in a graph live. This can be used as a default, /// conservative mark-live implementation. Error markAllSymbolsLive(LinkGraph &G); -/// Basic JITLink implementation. +/// Create a LinkGraph from the given object buffer. /// -/// This function will use sensible defaults for GOT and Stub handling. -void jitLink(std::unique_ptr<JITLinkContext> Ctx); +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected<std::unique_ptr<LinkGraph>> +createLinkGraphFromObject(MemoryBufferRef ObjectBuffer); + +/// Link the given graph. +void link(std::unique_ptr<LinkGraph> G, std::unique_ptr<JITLinkContext> Ctx); } // end namespace jitlink } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkDylib.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkDylib.h new file mode 100644 index 000000000000..2aa88cb50074 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkDylib.h @@ -0,0 +1,24 @@ +//===-- JITLinkDylib.h - JITLink Dylib type ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines the JITLinkDylib API. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINKDYLIB_H +#define LLVM_EXECUTIONENGINE_JITLINK_JITLINKDYLIB_H + +namespace llvm { +namespace jitlink { + +class JITLinkDylib {}; + +} // end namespace jitlink +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINKDYLIB_H diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h index 0c8514a60a50..cee7d6b09c48 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h @@ -14,10 +14,11 @@ #define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/Support/Error.h" -#include "llvm/Support/Memory.h" #include "llvm/Support/MSVCErrorWorkarounds.h" +#include "llvm/Support/Memory.h" #include <cstdint> #include <future> @@ -93,18 +94,28 @@ public: virtual ~JITLinkMemoryManager(); /// Create an Allocation object. + /// + /// The JD argument represents the target JITLinkDylib, and can be used by + /// JITLinkMemoryManager implementers to manage per-dylib allocation pools + /// (e.g. one pre-reserved address space slab per dylib to ensure that all + /// allocations for the dylib are within a certain range). The JD argument + /// may be null (representing an allocation not associated with any + /// JITDylib. + /// + /// The request argument describes the segment sizes and permisssions being + /// requested. virtual Expected<std::unique_ptr<Allocation>> - allocate(const SegmentsRequestMap &Request) = 0; + allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) = 0; }; /// A JITLinkMemoryManager that allocates in-process memory. class InProcessMemoryManager : public JITLinkMemoryManager { public: Expected<std::unique_ptr<Allocation>> - allocate(const SegmentsRequestMap &Request) override; + allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) override; }; } // end namespace jitlink } // end namespace llvm -#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H +#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h index 7facb657a51c..b8432c4d26c6 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h @@ -18,11 +18,20 @@ namespace llvm { namespace jitlink { +/// Create a LinkGraph from a MachO relocatable object. +/// +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected<std::unique_ptr<LinkGraph>> +createLinkGraphFromMachOObject(MemoryBufferRef ObjectBuffer); + /// jit-link the given ObjBuffer, which must be a MachO object file. /// /// Uses conservative defaults for GOT and stub handling based on the target /// platform. -void jitLink_MachO(std::unique_ptr<JITLinkContext> Ctx); +void link_MachO(std::unique_ptr<LinkGraph> G, + std::unique_ptr<JITLinkContext> Ctx); } // end namespace jitlink } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h index d70b545fff86..c6aed2b60eac 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h @@ -40,6 +40,14 @@ enum MachOARM64RelocationKind : Edge::Kind { } // namespace MachO_arm64_Edges +/// Create a LinkGraph from a MachO/arm64 relocatable object. +/// +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected<std::unique_ptr<LinkGraph>> +createLinkGraphFromMachOObject_arm64(MemoryBufferRef ObjectBuffer); + /// jit-link the given object buffer, which must be a MachO arm64 object file. /// /// If PrePrunePasses is empty then a default mark-live pass will be inserted @@ -49,7 +57,8 @@ enum MachOARM64RelocationKind : Edge::Kind { /// If PostPrunePasses is empty then a default GOT-and-stubs insertion pass will /// be inserted. If PostPrunePasses is not empty then the caller is responsible /// for including a pass to insert GOT and stub edges. -void jitLink_MachO_arm64(std::unique_ptr<JITLinkContext> Ctx); +void link_MachO_arm64(std::unique_ptr<LinkGraph> G, + std::unique_ptr<JITLinkContext> Ctx); /// Return the string name of the given MachO arm64 edge kind. StringRef getMachOARM64RelocationKindName(Edge::Kind R); diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h index 27fcdf4fa990..66c53d8c8291 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h @@ -45,7 +45,15 @@ enum MachOX86RelocationKind : Edge::Kind { } // namespace MachO_x86_64_Edges -/// jit-link the given object buffer, which must be a MachO x86-64 object file. +/// Create a LinkGraph from a MachO/x86-64 relocatable object. +/// +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected<std::unique_ptr<LinkGraph>> +createLinkGraphFromMachOObject_x86_64(MemoryBufferRef ObjectBuffer); + +/// jit-link the given LinkGraph. /// /// If PrePrunePasses is empty then a default mark-live pass will be inserted /// that will mark all exported atoms live. If PrePrunePasses is not empty, the @@ -54,7 +62,8 @@ enum MachOX86RelocationKind : Edge::Kind { /// If PostPrunePasses is empty then a default GOT-and-stubs insertion pass will /// be inserted. If PostPrunePasses is not empty then the caller is responsible /// for including a pass to insert GOT and stub edges. -void jitLink_MachO_x86_64(std::unique_ptr<JITLinkContext> Ctx); +void link_MachO_x86_64(std::unique_ptr<LinkGraph> G, + std::unique_ptr<JITLinkContext> Ctx); /// Return the string name of the given MachO x86-64 edge kind. StringRef getMachOX86RelocationKindName(Edge::Kind R); diff --git a/llvm/include/llvm/ExecutionEngine/JITSymbol.h b/llvm/include/llvm/ExecutionEngine/JITSymbol.h index 6f0030a18f47..9bbdd21f77de 100644 --- a/llvm/include/llvm/ExecutionEngine/JITSymbol.h +++ b/llvm/include/llvm/ExecutionEngine/JITSymbol.h @@ -429,7 +429,7 @@ public: virtual JITSymbol findSymbol(const std::string &Name) = 0; private: - virtual void anchor(); + void anchor() override; }; } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 9ecc0464dec1..91b12fd2277a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -20,12 +20,10 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" -#include "llvm/ExecutionEngine/Orc/LambdaResolver.h" #include "llvm/ExecutionEngine/Orc/Layer.h" #include "llvm/ExecutionEngine/Orc/LazyReexports.h" -#include "llvm/ExecutionEngine/Orc/Legacy.h" -#include "llvm/ExecutionEngine/Orc/OrcError.h" #include "llvm/ExecutionEngine/Orc/Speculation.h" +#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constant.h" @@ -96,7 +94,8 @@ public: /// Emits the given module. This should not be called by clients: it will be /// called by the JIT when a definition added via the add method is requested. - void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + void emit(std::unique_ptr<MaterializationResponsibility> R, + ThreadSafeModule TSM) override; private: struct PerDylibResources { @@ -120,7 +119,8 @@ private: void expandPartition(GlobalValueSet &Partition); - void emitPartition(MaterializationResponsibility R, ThreadSafeModule TSM, + void emitPartition(std::unique_ptr<MaterializationResponsibility> R, + ThreadSafeModule TSM, IRMaterializationUnit::SymbolNameToDefinitionMap Defs); mutable std::mutex CODLayerMutex; @@ -134,635 +134,6 @@ private: ImplSymbolMap *AliaseeImpls = nullptr; }; -/// Compile-on-demand layer. -/// -/// When a module is added to this layer a stub is created for each of its -/// function definitions. The stubs and other global values are immediately -/// added to the layer below. When a stub is called it triggers the extraction -/// of the function body from the original module. The extracted body is then -/// compiled and executed. -template <typename BaseLayerT, - typename CompileCallbackMgrT = JITCompileCallbackManager, - typename IndirectStubsMgrT = IndirectStubsManager> -class LegacyCompileOnDemandLayer { -private: - template <typename MaterializerFtor> - class LambdaMaterializer final : public ValueMaterializer { - public: - LambdaMaterializer(MaterializerFtor M) : M(std::move(M)) {} - - Value *materialize(Value *V) final { return M(V); } - - private: - MaterializerFtor M; - }; - - template <typename MaterializerFtor> - LambdaMaterializer<MaterializerFtor> - createLambdaMaterializer(MaterializerFtor M) { - return LambdaMaterializer<MaterializerFtor>(std::move(M)); - } - - // Provide type-erasure for the Modules and MemoryManagers. - template <typename ResourceT> - class ResourceOwner { - public: - ResourceOwner() = default; - ResourceOwner(const ResourceOwner &) = delete; - ResourceOwner &operator=(const ResourceOwner &) = delete; - virtual ~ResourceOwner() = default; - - virtual ResourceT& getResource() const = 0; - }; - - template <typename ResourceT, typename ResourcePtrT> - class ResourceOwnerImpl : public ResourceOwner<ResourceT> { - public: - ResourceOwnerImpl(ResourcePtrT ResourcePtr) - : ResourcePtr(std::move(ResourcePtr)) {} - - ResourceT& getResource() const override { return *ResourcePtr; } - - private: - ResourcePtrT ResourcePtr; - }; - - template <typename ResourceT, typename ResourcePtrT> - std::unique_ptr<ResourceOwner<ResourceT>> - wrapOwnership(ResourcePtrT ResourcePtr) { - using RO = ResourceOwnerImpl<ResourceT, ResourcePtrT>; - return std::make_unique<RO>(std::move(ResourcePtr)); - } - - struct LogicalDylib { - struct SourceModuleEntry { - std::unique_ptr<Module> SourceMod; - std::set<Function*> StubsToClone; - }; - - using SourceModulesList = std::vector<SourceModuleEntry>; - using SourceModuleHandle = typename SourceModulesList::size_type; - - LogicalDylib() = default; - - LogicalDylib(VModuleKey K, std::shared_ptr<SymbolResolver> BackingResolver, - std::unique_ptr<IndirectStubsMgrT> StubsMgr) - : K(std::move(K)), BackingResolver(std::move(BackingResolver)), - StubsMgr(std::move(StubsMgr)) {} - - SourceModuleHandle addSourceModule(std::unique_ptr<Module> M) { - SourceModuleHandle H = SourceModules.size(); - SourceModules.push_back(SourceModuleEntry()); - SourceModules.back().SourceMod = std::move(M); - return H; - } - - Module& getSourceModule(SourceModuleHandle H) { - return *SourceModules[H].SourceMod; - } - - std::set<Function*>& getStubsToClone(SourceModuleHandle H) { - return SourceModules[H].StubsToClone; - } - - JITSymbol findSymbol(BaseLayerT &BaseLayer, const std::string &Name, - bool ExportedSymbolsOnly) { - if (auto Sym = StubsMgr->findStub(Name, ExportedSymbolsOnly)) - return Sym; - for (auto BLK : BaseLayerVModuleKeys) - if (auto Sym = BaseLayer.findSymbolIn(BLK, Name, ExportedSymbolsOnly)) - return Sym; - else if (auto Err = Sym.takeError()) - return std::move(Err); - return nullptr; - } - - Error removeModulesFromBaseLayer(BaseLayerT &BaseLayer) { - for (auto &BLK : BaseLayerVModuleKeys) - if (auto Err = BaseLayer.removeModule(BLK)) - return Err; - return Error::success(); - } - - VModuleKey K; - std::shared_ptr<SymbolResolver> BackingResolver; - std::unique_ptr<IndirectStubsMgrT> StubsMgr; - SymbolLinkagePromoter PromoteSymbols; - SourceModulesList SourceModules; - std::vector<VModuleKey> BaseLayerVModuleKeys; - }; - -public: - - /// Module partitioning functor. - using PartitioningFtor = std::function<std::set<Function*>(Function&)>; - - /// Builder for IndirectStubsManagers. - using IndirectStubsManagerBuilderT = - std::function<std::unique_ptr<IndirectStubsMgrT>()>; - - using SymbolResolverGetter = - std::function<std::shared_ptr<SymbolResolver>(VModuleKey K)>; - - using SymbolResolverSetter = - std::function<void(VModuleKey K, std::shared_ptr<SymbolResolver> R)>; - - /// Construct a compile-on-demand layer instance. - LLVM_ATTRIBUTE_DEPRECATED( - LegacyCompileOnDemandLayer( - ExecutionSession &ES, BaseLayerT &BaseLayer, - SymbolResolverGetter GetSymbolResolver, - SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition, - CompileCallbackMgrT &CallbackMgr, - IndirectStubsManagerBuilderT CreateIndirectStubsManager, - bool CloneStubsIntoPartitions = true), - "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please " - "use " - "the ORCv2 LegacyCompileOnDemandLayer instead"); - - /// Legacy layer constructor with deprecation acknowledgement. - LegacyCompileOnDemandLayer( - ORCv1DeprecationAcknowledgement, ExecutionSession &ES, - BaseLayerT &BaseLayer, SymbolResolverGetter GetSymbolResolver, - SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition, - CompileCallbackMgrT &CallbackMgr, - IndirectStubsManagerBuilderT CreateIndirectStubsManager, - bool CloneStubsIntoPartitions = true) - : ES(ES), BaseLayer(BaseLayer), - GetSymbolResolver(std::move(GetSymbolResolver)), - SetSymbolResolver(std::move(SetSymbolResolver)), - Partition(std::move(Partition)), CompileCallbackMgr(CallbackMgr), - CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)), - CloneStubsIntoPartitions(CloneStubsIntoPartitions) {} - - ~LegacyCompileOnDemandLayer() { - // FIXME: Report error on log. - while (!LogicalDylibs.empty()) - consumeError(removeModule(LogicalDylibs.begin()->first)); - } - - /// Add a module to the compile-on-demand layer. - Error addModule(VModuleKey K, std::unique_ptr<Module> M) { - - assert(!LogicalDylibs.count(K) && "VModuleKey K already in use"); - auto I = LogicalDylibs.insert( - LogicalDylibs.end(), - std::make_pair(K, LogicalDylib(K, GetSymbolResolver(K), - CreateIndirectStubsManager()))); - - return addLogicalModule(I->second, std::move(M)); - } - - /// Add extra modules to an existing logical module. - Error addExtraModule(VModuleKey K, std::unique_ptr<Module> M) { - return addLogicalModule(LogicalDylibs[K], std::move(M)); - } - - /// Remove the module represented by the given key. - /// - /// This will remove all modules in the layers below that were derived from - /// the module represented by K. - Error removeModule(VModuleKey K) { - auto I = LogicalDylibs.find(K); - assert(I != LogicalDylibs.end() && "VModuleKey K not valid here"); - auto Err = I->second.removeModulesFromBaseLayer(BaseLayer); - LogicalDylibs.erase(I); - return Err; - } - - /// Search for the given named symbol. - /// @param Name The name of the symbol to search for. - /// @param ExportedSymbolsOnly If true, search only for exported symbols. - /// @return A handle for the given named symbol, if it exists. - JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) { - for (auto &KV : LogicalDylibs) { - if (auto Sym = KV.second.StubsMgr->findStub(Name, ExportedSymbolsOnly)) - return Sym; - if (auto Sym = - findSymbolIn(KV.first, std::string(Name), ExportedSymbolsOnly)) - return Sym; - else if (auto Err = Sym.takeError()) - return std::move(Err); - } - return BaseLayer.findSymbol(std::string(Name), ExportedSymbolsOnly); - } - - /// Get the address of a symbol provided by this layer, or some layer - /// below this one. - JITSymbol findSymbolIn(VModuleKey K, const std::string &Name, - bool ExportedSymbolsOnly) { - assert(LogicalDylibs.count(K) && "VModuleKey K is not valid here"); - return LogicalDylibs[K].findSymbol(BaseLayer, Name, ExportedSymbolsOnly); - } - - /// Update the stub for the given function to point at FnBodyAddr. - /// This can be used to support re-optimization. - /// @return true if the function exists and the stub is updated, false - /// otherwise. - // - // FIXME: We should track and free associated resources (unused compile - // callbacks, uncompiled IR, and no-longer-needed/reachable function - // implementations). - Error updatePointer(std::string FuncName, JITTargetAddress FnBodyAddr) { - //Find out which logical dylib contains our symbol - auto LDI = LogicalDylibs.begin(); - for (auto LDE = LogicalDylibs.end(); LDI != LDE; ++LDI) { - if (auto LMResources = - LDI->getLogicalModuleResourcesForSymbol(FuncName, false)) { - Module &SrcM = LMResources->SourceModule->getResource(); - std::string CalledFnName = mangle(FuncName, SrcM.getDataLayout()); - if (auto Err = LMResources->StubsMgr->updatePointer(CalledFnName, - FnBodyAddr)) - return Err; - return Error::success(); - } - } - return make_error<JITSymbolNotFound>(FuncName); - } - -private: - Error addLogicalModule(LogicalDylib &LD, std::unique_ptr<Module> SrcMPtr) { - - // Rename anonymous globals and promote linkage to ensure that everything - // will resolve properly after we partition SrcM. - LD.PromoteSymbols(*SrcMPtr); - - // Create a logical module handle for SrcM within the logical dylib. - Module &SrcM = *SrcMPtr; - auto LMId = LD.addSourceModule(std::move(SrcMPtr)); - - // Create stub functions. - const DataLayout &DL = SrcM.getDataLayout(); - - typename IndirectStubsMgrT::StubInitsMap StubInits; - for (auto &F : SrcM) { - // Skip declarations. - if (F.isDeclaration()) - continue; - - // Skip weak functions for which we already have definitions. - auto MangledName = mangle(F.getName(), DL); - if (F.hasWeakLinkage() || F.hasLinkOnceLinkage()) { - if (auto Sym = LD.findSymbol(BaseLayer, MangledName, false)) - continue; - else if (auto Err = Sym.takeError()) - return Err; - } - - // Record all functions defined by this module. - if (CloneStubsIntoPartitions) - LD.getStubsToClone(LMId).insert(&F); - - // Create a callback, associate it with the stub for the function, - // and set the compile action to compile the partition containing the - // function. - auto CompileAction = [this, &LD, LMId, &F]() -> JITTargetAddress { - if (auto FnImplAddrOrErr = this->extractAndCompile(LD, LMId, F)) - return *FnImplAddrOrErr; - else { - // FIXME: Report error, return to 'abort' or something similar. - consumeError(FnImplAddrOrErr.takeError()); - return 0; - } - }; - if (auto CCAddr = - CompileCallbackMgr.getCompileCallback(std::move(CompileAction))) - StubInits[MangledName] = - std::make_pair(*CCAddr, JITSymbolFlags::fromGlobalValue(F)); - else - return CCAddr.takeError(); - } - - if (auto Err = LD.StubsMgr->createStubs(StubInits)) - return Err; - - // If this module doesn't contain any globals, aliases, or module flags then - // we can bail out early and avoid the overhead of creating and managing an - // empty globals module. - if (SrcM.global_empty() && SrcM.alias_empty() && - !SrcM.getModuleFlagsMetadata()) - return Error::success(); - - // Create the GlobalValues module. - auto GVsM = std::make_unique<Module>((SrcM.getName() + ".globals").str(), - SrcM.getContext()); - GVsM->setDataLayout(DL); - - ValueToValueMapTy VMap; - - // Clone global variable decls. - for (auto &GV : SrcM.globals()) - if (!GV.isDeclaration() && !VMap.count(&GV)) - cloneGlobalVariableDecl(*GVsM, GV, &VMap); - - // And the aliases. - for (auto &A : SrcM.aliases()) - if (!VMap.count(&A)) - cloneGlobalAliasDecl(*GVsM, A, VMap); - - // Clone the module flags. - cloneModuleFlagsMetadata(*GVsM, SrcM, VMap); - - // Now we need to clone the GV and alias initializers. - - // Initializers may refer to functions declared (but not defined) in this - // module. Build a materializer to clone decls on demand. - auto Materializer = createLambdaMaterializer( - [&LD, &GVsM](Value *V) -> Value* { - if (auto *F = dyn_cast<Function>(V)) { - // Decls in the original module just get cloned. - if (F->isDeclaration()) - return cloneFunctionDecl(*GVsM, *F); - - // Definitions in the original module (which we have emitted stubs - // for at this point) get turned into a constant alias to the stub - // instead. - const DataLayout &DL = GVsM->getDataLayout(); - std::string FName = mangle(F->getName(), DL); - unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(F->getType()); - JITTargetAddress StubAddr = - LD.StubsMgr->findStub(FName, false).getAddress(); - - ConstantInt *StubAddrCI = - ConstantInt::get(GVsM->getContext(), APInt(PtrBitWidth, StubAddr)); - Constant *Init = ConstantExpr::getCast(Instruction::IntToPtr, - StubAddrCI, F->getType()); - return GlobalAlias::create(F->getFunctionType(), - F->getType()->getAddressSpace(), - F->getLinkage(), F->getName(), - Init, GVsM.get()); - } - // else.... - return nullptr; - }); - - // Clone the global variable initializers. - for (auto &GV : SrcM.globals()) - if (!GV.isDeclaration()) - moveGlobalVariableInitializer(GV, VMap, &Materializer); - - // Clone the global alias initializers. - for (auto &A : SrcM.aliases()) { - auto *NewA = cast<GlobalAlias>(VMap[&A]); - assert(NewA && "Alias not cloned?"); - Value *Init = MapValue(A.getAliasee(), VMap, RF_None, nullptr, - &Materializer); - NewA->setAliasee(cast<Constant>(Init)); - } - - // Build a resolver for the globals module and add it to the base layer. - auto LegacyLookup = [this, &LD](StringRef Name) -> JITSymbol { - if (auto Sym = LD.StubsMgr->findStub(Name, false)) - return Sym; - - if (auto Sym = LD.findSymbol(BaseLayer, std::string(Name), false)) - return Sym; - else if (auto Err = Sym.takeError()) - return std::move(Err); - - return nullptr; - }; - - auto GVsResolver = createSymbolResolver( - [&LD, LegacyLookup](const SymbolNameSet &Symbols) { - auto RS = getResponsibilitySetWithLegacyFn(Symbols, LegacyLookup); - - if (!RS) { - logAllUnhandledErrors( - RS.takeError(), errs(), - "CODLayer/GVsResolver responsibility set lookup failed: "); - return SymbolNameSet(); - } - - if (RS->size() == Symbols.size()) - return *RS; - - SymbolNameSet NotFoundViaLegacyLookup; - for (auto &S : Symbols) - if (!RS->count(S)) - NotFoundViaLegacyLookup.insert(S); - auto RS2 = - LD.BackingResolver->getResponsibilitySet(NotFoundViaLegacyLookup); - - for (auto &S : RS2) - (*RS).insert(S); - - return *RS; - }, - [this, &LD, - LegacyLookup](std::shared_ptr<AsynchronousSymbolQuery> Query, - SymbolNameSet Symbols) { - auto NotFoundViaLegacyLookup = - lookupWithLegacyFn(ES, *Query, Symbols, LegacyLookup); - return LD.BackingResolver->lookup(Query, NotFoundViaLegacyLookup); - }); - - SetSymbolResolver(LD.K, std::move(GVsResolver)); - - if (auto Err = BaseLayer.addModule(LD.K, std::move(GVsM))) - return Err; - - LD.BaseLayerVModuleKeys.push_back(LD.K); - - return Error::success(); - } - - static std::string mangle(StringRef Name, const DataLayout &DL) { - std::string MangledName; - { - raw_string_ostream MangledNameStream(MangledName); - Mangler::getNameWithPrefix(MangledNameStream, Name, DL); - } - return MangledName; - } - - Expected<JITTargetAddress> - extractAndCompile(LogicalDylib &LD, - typename LogicalDylib::SourceModuleHandle LMId, - Function &F) { - Module &SrcM = LD.getSourceModule(LMId); - - // If F is a declaration we must already have compiled it. - if (F.isDeclaration()) - return 0; - - // Grab the name of the function being called here. - std::string CalledFnName = mangle(F.getName(), SrcM.getDataLayout()); - - JITTargetAddress CalledAddr = 0; - auto Part = Partition(F); - if (auto PartKeyOrErr = emitPartition(LD, LMId, Part)) { - auto &PartKey = *PartKeyOrErr; - for (auto *SubF : Part) { - std::string FnName = mangle(SubF->getName(), SrcM.getDataLayout()); - if (auto FnBodySym = BaseLayer.findSymbolIn(PartKey, FnName, false)) { - if (auto FnBodyAddrOrErr = FnBodySym.getAddress()) { - JITTargetAddress FnBodyAddr = *FnBodyAddrOrErr; - - // If this is the function we're calling record the address so we can - // return it from this function. - if (SubF == &F) - CalledAddr = FnBodyAddr; - - // Update the function body pointer for the stub. - if (auto EC = LD.StubsMgr->updatePointer(FnName, FnBodyAddr)) - return 0; - - } else - return FnBodyAddrOrErr.takeError(); - } else if (auto Err = FnBodySym.takeError()) - return std::move(Err); - else - llvm_unreachable("Function not emitted for partition"); - } - - LD.BaseLayerVModuleKeys.push_back(PartKey); - } else - return PartKeyOrErr.takeError(); - - return CalledAddr; - } - - template <typename PartitionT> - Expected<VModuleKey> - emitPartition(LogicalDylib &LD, - typename LogicalDylib::SourceModuleHandle LMId, - const PartitionT &Part) { - Module &SrcM = LD.getSourceModule(LMId); - - // Create the module. - std::string NewName(SrcM.getName()); - for (auto *F : Part) { - NewName += "."; - NewName += F->getName(); - } - - auto M = std::make_unique<Module>(NewName, SrcM.getContext()); - M->setDataLayout(SrcM.getDataLayout()); - ValueToValueMapTy VMap; - - auto Materializer = createLambdaMaterializer([&LD, &LMId, - &M](Value *V) -> Value * { - if (auto *GV = dyn_cast<GlobalVariable>(V)) - return cloneGlobalVariableDecl(*M, *GV); - - if (auto *F = dyn_cast<Function>(V)) { - // Check whether we want to clone an available_externally definition. - if (!LD.getStubsToClone(LMId).count(F)) - return cloneFunctionDecl(*M, *F); - - // Ok - we want an inlinable stub. For that to work we need a decl - // for the stub pointer. - auto *StubPtr = createImplPointer(*F->getType(), *M, - F->getName() + "$stub_ptr", nullptr); - auto *ClonedF = cloneFunctionDecl(*M, *F); - makeStub(*ClonedF, *StubPtr); - ClonedF->setLinkage(GlobalValue::AvailableExternallyLinkage); - ClonedF->addFnAttr(Attribute::AlwaysInline); - return ClonedF; - } - - if (auto *A = dyn_cast<GlobalAlias>(V)) { - auto *Ty = A->getValueType(); - if (Ty->isFunctionTy()) - return Function::Create(cast<FunctionType>(Ty), - GlobalValue::ExternalLinkage, A->getName(), - M.get()); - - return new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, - nullptr, A->getName(), nullptr, - GlobalValue::NotThreadLocal, - A->getType()->getAddressSpace()); - } - - return nullptr; - }); - - // Create decls in the new module. - for (auto *F : Part) - cloneFunctionDecl(*M, *F, &VMap); - - // Move the function bodies. - for (auto *F : Part) - moveFunctionBody(*F, VMap, &Materializer); - - auto K = ES.allocateVModule(); - - auto LegacyLookup = [this, &LD](StringRef Name) -> JITSymbol { - return LD.findSymbol(BaseLayer, std::string(Name), false); - }; - - // Create memory manager and symbol resolver. - auto Resolver = createSymbolResolver( - [&LD, LegacyLookup](const SymbolNameSet &Symbols) { - auto RS = getResponsibilitySetWithLegacyFn(Symbols, LegacyLookup); - if (!RS) { - logAllUnhandledErrors( - RS.takeError(), errs(), - "CODLayer/SubResolver responsibility set lookup failed: "); - return SymbolNameSet(); - } - - if (RS->size() == Symbols.size()) - return *RS; - - SymbolNameSet NotFoundViaLegacyLookup; - for (auto &S : Symbols) - if (!RS->count(S)) - NotFoundViaLegacyLookup.insert(S); - - auto RS2 = - LD.BackingResolver->getResponsibilitySet(NotFoundViaLegacyLookup); - - for (auto &S : RS2) - (*RS).insert(S); - - return *RS; - }, - [this, &LD, LegacyLookup](std::shared_ptr<AsynchronousSymbolQuery> Q, - SymbolNameSet Symbols) { - auto NotFoundViaLegacyLookup = - lookupWithLegacyFn(ES, *Q, Symbols, LegacyLookup); - return LD.BackingResolver->lookup(Q, - std::move(NotFoundViaLegacyLookup)); - }); - SetSymbolResolver(K, std::move(Resolver)); - - if (auto Err = BaseLayer.addModule(std::move(K), std::move(M))) - return std::move(Err); - - return K; - } - - ExecutionSession &ES; - BaseLayerT &BaseLayer; - SymbolResolverGetter GetSymbolResolver; - SymbolResolverSetter SetSymbolResolver; - PartitioningFtor Partition; - CompileCallbackMgrT &CompileCallbackMgr; - IndirectStubsManagerBuilderT CreateIndirectStubsManager; - - std::map<VModuleKey, LogicalDylib> LogicalDylibs; - bool CloneStubsIntoPartitions; -}; - -template <typename BaseLayerT, typename CompileCallbackMgrT, - typename IndirectStubsMgrT> -LegacyCompileOnDemandLayer<BaseLayerT, CompileCallbackMgrT, IndirectStubsMgrT>:: - LegacyCompileOnDemandLayer( - ExecutionSession &ES, BaseLayerT &BaseLayer, - SymbolResolverGetter GetSymbolResolver, - SymbolResolverSetter SetSymbolResolver, PartitioningFtor Partition, - CompileCallbackMgrT &CallbackMgr, - IndirectStubsManagerBuilderT CreateIndirectStubsManager, - bool CloneStubsIntoPartitions) - : ES(ES), BaseLayer(BaseLayer), - GetSymbolResolver(std::move(GetSymbolResolver)), - SetSymbolResolver(std::move(SetSymbolResolver)), - Partition(std::move(Partition)), CompileCallbackMgr(CallbackMgr), - CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)), - CloneStubsIntoPartitions(CloneStubsIntoPartitions) {} - } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h index 8376d163d57a..c7ba57228ab7 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h @@ -28,8 +28,6 @@ class TargetMachine; namespace orc { -class JITTargetMachineBuilder; - IRSymbolMapper::ManglingOptions irManglingOptionsFromTargetOptions(const TargetOptions &Opts); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index a117acefd2d3..4a4b58ed32e3 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -16,11 +16,14 @@ #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FunctionExtras.h" +#include "llvm/ADT/IntrusiveRefCntPtr.h" +#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" #include "llvm/ExecutionEngine/OrcV1Deprecation.h" #include "llvm/Support/Debug.h" +#include <atomic> #include <memory> #include <vector> @@ -33,11 +36,67 @@ class ExecutionSession; class MaterializationUnit; class MaterializationResponsibility; class JITDylib; +class ResourceTracker; +class InProgressLookupState; + enum class SymbolState : uint8_t; -/// VModuleKey provides a unique identifier (allocated and managed by -/// ExecutionSessions) for a module added to the JIT. -using VModuleKey = uint64_t; +using ResourceTrackerSP = IntrusiveRefCntPtr<ResourceTracker>; +using JITDylibSP = IntrusiveRefCntPtr<JITDylib>; + +using ResourceKey = uintptr_t; + +/// API to remove / transfer ownership of JIT resources. +class ResourceTracker : public ThreadSafeRefCountedBase<ResourceTracker> { +private: + friend class ExecutionSession; + friend class JITDylib; + friend class MaterializationResponsibility; + +public: + ResourceTracker(const ResourceTracker &) = delete; + ResourceTracker &operator=(const ResourceTracker &) = delete; + ResourceTracker(ResourceTracker &&) = delete; + ResourceTracker &operator=(ResourceTracker &&) = delete; + + ~ResourceTracker(); + + /// Return the JITDylib targeted by this tracker. + JITDylib &getJITDylib() const { + return *reinterpret_cast<JITDylib *>(JDAndFlag.load() & + ~static_cast<uintptr_t>(1)); + } + + /// Remove all resources associated with this key. + Error remove(); + + /// Transfer all resources associated with this key to the given + /// tracker, which must target the same JITDylib as this one. + void transferTo(ResourceTracker &DstRT); + + /// Return true if this tracker has become defunct. + bool isDefunct() const { return JDAndFlag.load() & 0x1; } + + /// Returns the key associated with this tracker. + /// This method should not be used except for debug logging: there is no + /// guarantee that the returned value will remain valid. + ResourceKey getKeyUnsafe() const { return reinterpret_cast<uintptr_t>(this); } + +private: + ResourceTracker(JITDylibSP JD); + + void makeDefunct(); + + std::atomic_uintptr_t JDAndFlag; +}; + +/// Listens for ResourceTracker operations. +class ResourceManager { +public: + virtual ~ResourceManager(); + virtual Error handleRemoveResources(ResourceKey K) = 0; + virtual void handleTransferResources(ResourceKey DstK, ResourceKey SrcK) = 0; +}; /// A set of symbol names (represented by SymbolStringPtrs for // efficiency). @@ -158,9 +217,19 @@ public: /// Add an element to the set. The client is responsible for checking that /// duplicates are not added. - void add(SymbolStringPtr Name, - SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { + SymbolLookupSet & + add(SymbolStringPtr Name, + SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { Symbols.push_back(std::make_pair(std::move(Name), Flags)); + return *this; + } + + /// Quickly append one lookup set to another. + SymbolLookupSet &append(SymbolLookupSet Other) { + Symbols.reserve(Symbols.size() + Other.size()); + for (auto &KV : Other) + Symbols.push_back(std::move(KV)); + return *this; } bool empty() const { return Symbols.empty(); } @@ -287,7 +356,7 @@ public: for (UnderlyingVector::size_type I = 1; I != Symbols.size(); ++I) if (Symbols[I].first == Symbols[I - 1].first) return true; - return true; + return false; } #endif @@ -318,6 +387,18 @@ using RegisterDependenciesFunction = /// are no dependants to register with. extern RegisterDependenciesFunction NoDependenciesToRegister; +class ResourceTrackerDefunct : public ErrorInfo<ResourceTrackerDefunct> { +public: + static char ID; + + ResourceTrackerDefunct(ResourceTrackerSP RT); + std::error_code convertToErrorCode() const override; + void log(raw_ostream &OS) const override; + +private: + ResourceTrackerSP RT; +}; + /// Used to notify a JITDylib that the given set of symbols failed to /// materialize. class FailedToMaterialize : public ErrorInfo<FailedToMaterialize> { @@ -408,9 +489,10 @@ private: /// emit symbols, or abandon materialization by notifying any unmaterialized /// symbols of an error. class MaterializationResponsibility { - friend class MaterializationUnit; + friend class ExecutionSession; + public: - MaterializationResponsibility(MaterializationResponsibility &&) = default; + MaterializationResponsibility(MaterializationResponsibility &&) = delete; MaterializationResponsibility & operator=(MaterializationResponsibility &&) = delete; @@ -419,12 +501,15 @@ public: /// emitted or notified of an error. ~MaterializationResponsibility(); + /// Returns the ResourceTracker for this instance. + template <typename Func> Error withResourceKeyDo(Func &&F) const; + /// Returns the target JITDylib that these symbols are being materialized /// into. JITDylib &getTargetJITDylib() const { return *JD; } - /// Returns the VModuleKey for this instance. - VModuleKey getVModuleKey() const { return K; } + /// Returns the ExecutionSession for this instance. + ExecutionSession &getExecutionSession(); /// Returns the symbol flags map for this responsibility instance. /// Note: The returned flags may have transient flags (Lazy, Materializing) @@ -509,13 +594,13 @@ public: /// materializers to break up work based on run-time information (e.g. /// by introspecting which symbols have actually been looked up and /// materializing only those). - void replace(std::unique_ptr<MaterializationUnit> MU); + Error replace(std::unique_ptr<MaterializationUnit> MU); /// Delegates responsibility for the given symbols to the returned /// materialization responsibility. Useful for breaking up work between /// threads, or different kinds of materialization processes. - MaterializationResponsibility delegate(const SymbolNameSet &Symbols, - VModuleKey NewKey = VModuleKey()); + Expected<std::unique_ptr<MaterializationResponsibility>> + delegate(const SymbolNameSet &Symbols); void addDependencies(const SymbolStringPtr &Name, const SymbolDependenceMap &Dependencies); @@ -526,19 +611,17 @@ public: private: /// Create a MaterializationResponsibility for the given JITDylib and /// initial symbols. - MaterializationResponsibility(std::shared_ptr<JITDylib> JD, - SymbolFlagsMap SymbolFlags, - SymbolStringPtr InitSymbol, VModuleKey K) + MaterializationResponsibility(JITDylibSP JD, SymbolFlagsMap SymbolFlags, + SymbolStringPtr InitSymbol) : JD(std::move(JD)), SymbolFlags(std::move(SymbolFlags)), - InitSymbol(std::move(InitSymbol)), K(std::move(K)) { - assert(this->JD && "Cannot initialize with null JD"); + InitSymbol(std::move(InitSymbol)) { + assert(this->JD && "Cannot initialize with null JITDylib"); assert(!this->SymbolFlags.empty() && "Materializing nothing?"); } - std::shared_ptr<JITDylib> JD; + JITDylibSP JD; SymbolFlagsMap SymbolFlags; SymbolStringPtr InitSymbol; - VModuleKey K; }; /// A MaterializationUnit represents a set of symbol definitions that can @@ -555,9 +638,9 @@ class MaterializationUnit { public: MaterializationUnit(SymbolFlagsMap InitalSymbolFlags, - SymbolStringPtr InitSymbol, VModuleKey K) + SymbolStringPtr InitSymbol) : SymbolFlags(std::move(InitalSymbolFlags)), - InitSymbol(std::move(InitSymbol)), K(std::move(K)) { + InitSymbol(std::move(InitSymbol)) { assert((!this->InitSymbol || this->SymbolFlags.count(this->InitSymbol)) && "If set, InitSymbol should appear in InitialSymbolFlags map"); } @@ -577,7 +660,8 @@ public: /// Implementations of this method should materialize all symbols /// in the materialzation unit, except for those that have been /// previously discarded. - virtual void materialize(MaterializationResponsibility R) = 0; + virtual void + materialize(std::unique_ptr<MaterializationResponsibility> R) = 0; /// Called by JITDylibs to notify MaterializationUnits that the given symbol /// has been overridden. @@ -589,17 +673,10 @@ public: protected: SymbolFlagsMap SymbolFlags; SymbolStringPtr InitSymbol; - VModuleKey K; private: virtual void anchor(); - MaterializationResponsibility - createMaterializationResponsibility(std::shared_ptr<JITDylib> JD) { - return MaterializationResponsibility(std::move(JD), std::move(SymbolFlags), - std::move(InitSymbol), K); - } - /// Implementations of this method should discard the given symbol /// from the source (e.g. if the source is an LLVM IR Module and the /// symbol is a function, delete the function body or mark it available @@ -607,21 +684,18 @@ private: virtual void discard(const JITDylib &JD, const SymbolStringPtr &Name) = 0; }; -using MaterializationUnitList = - std::vector<std::unique_ptr<MaterializationUnit>>; - /// A MaterializationUnit implementation for pre-existing absolute symbols. /// /// All symbols will be resolved and marked ready as soon as the unit is /// materialized. class AbsoluteSymbolsMaterializationUnit : public MaterializationUnit { public: - AbsoluteSymbolsMaterializationUnit(SymbolMap Symbols, VModuleKey K); + AbsoluteSymbolsMaterializationUnit(SymbolMap Symbols); StringRef getName() const override; private: - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr<MaterializationResponsibility> R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; static SymbolFlagsMap extractFlags(const SymbolMap &Symbols); @@ -639,9 +713,9 @@ private: /// \endcode /// inline std::unique_ptr<AbsoluteSymbolsMaterializationUnit> -absoluteSymbols(SymbolMap Symbols, VModuleKey K = VModuleKey()) { +absoluteSymbols(SymbolMap Symbols) { return std::make_unique<AbsoluteSymbolsMaterializationUnit>( - std::move(Symbols), std::move(K)); + std::move(Symbols)); } /// A materialization unit for symbol aliases. Allows existing symbols to be @@ -658,12 +732,12 @@ public: /// resolved. ReExportsMaterializationUnit(JITDylib *SourceJD, JITDylibLookupFlags SourceJDLookupFlags, - SymbolAliasMap Aliases, VModuleKey K); + SymbolAliasMap Aliases); StringRef getName() const override; private: - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr<MaterializationResponsibility> R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases); @@ -684,10 +758,9 @@ private: /// return Err; /// \endcode inline std::unique_ptr<ReExportsMaterializationUnit> -symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) { +symbolAliases(SymbolAliasMap Aliases) { return std::make_unique<ReExportsMaterializationUnit>( - nullptr, JITDylibLookupFlags::MatchAllSymbols, std::move(Aliases), - std::move(K)); + nullptr, JITDylibLookupFlags::MatchAllSymbols, std::move(Aliases)); } /// Create a materialization unit for re-exporting symbols from another JITDylib @@ -696,10 +769,9 @@ symbolAliases(SymbolAliasMap Aliases, VModuleKey K = VModuleKey()) { inline std::unique_ptr<ReExportsMaterializationUnit> reexports(JITDylib &SourceJD, SymbolAliasMap Aliases, JITDylibLookupFlags SourceJDLookupFlags = - JITDylibLookupFlags::MatchExportedSymbolsOnly, - VModuleKey K = VModuleKey()) { + JITDylibLookupFlags::MatchExportedSymbolsOnly) { return std::make_unique<ReExportsMaterializationUnit>( - &SourceJD, SourceJDLookupFlags, std::move(Aliases), std::move(K)); + &SourceJD, SourceJDLookupFlags, std::move(Aliases)); } /// Build a SymbolAliasMap for the common case where you want to re-export @@ -723,8 +795,10 @@ enum class SymbolState : uint8_t { /// makes a callback when all symbols are available. class AsynchronousSymbolQuery { friend class ExecutionSession; + friend class InProgressFullLookupState; friend class JITDylib; friend class JITSymbolResolverAdapter; + friend class MaterializationResponsibility; public: /// Create a query for the given symbols. The NotifyComplete @@ -757,8 +831,6 @@ private: void dropSymbol(const SymbolStringPtr &Name); - bool canStillFail(); - void handleFailed(Error Err); void detach(); @@ -770,34 +842,62 @@ private: SymbolState RequiredState; }; +/// Wraps state for a lookup-in-progress. +/// DefinitionGenerators can optionally take ownership of a LookupState object +/// to suspend a lookup-in-progress while they search for definitions. +class LookupState { + friend class OrcV2CAPIHelper; + friend class ExecutionSession; + +public: + LookupState(); + LookupState(LookupState &&); + LookupState &operator=(LookupState &&); + ~LookupState(); + + /// Continue the lookup. This can be called by DefinitionGenerators + /// to re-start a captured query-application operation. + void continueLookup(Error Err); + +private: + LookupState(std::unique_ptr<InProgressLookupState> IPLS); + + // For C API. + void reset(InProgressLookupState *IPLS); + + std::unique_ptr<InProgressLookupState> IPLS; +}; + +/// Definition generators can be attached to JITDylibs to generate new +/// definitions for otherwise unresolved symbols during lookup. +class DefinitionGenerator { +public: + virtual ~DefinitionGenerator(); + + /// DefinitionGenerators should override this method to insert new + /// definitions into the parent JITDylib. K specifies the kind of this + /// lookup. JD specifies the target JITDylib being searched, and + /// JDLookupFlags specifies whether the search should match against + /// hidden symbols. Finally, Symbols describes the set of unresolved + /// symbols and their associated lookup flags. + virtual Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &LookupSet) = 0; +}; + /// A symbol table that supports asynchoronous symbol queries. /// /// Represents a virtual shared object. Instances can not be copied or moved, so /// their addresses may be used as keys for resource management. /// JITDylib state changes must be made via an ExecutionSession to guarantee /// that they are synchronized with respect to other JITDylib operations. -class JITDylib : public std::enable_shared_from_this<JITDylib> { +class JITDylib : public ThreadSafeRefCountedBase<JITDylib>, + public jitlink::JITLinkDylib { friend class AsynchronousSymbolQuery; friend class ExecutionSession; friend class Platform; friend class MaterializationResponsibility; public: - /// Definition generators can be attached to JITDylibs to generate new - /// definitions for otherwise unresolved symbols during lookup. - class DefinitionGenerator { - public: - virtual ~DefinitionGenerator(); - - /// DefinitionGenerators should override this method to insert new - /// definitions into the parent JITDylib. K specifies the kind of this - /// lookup. JD specifies the target JITDylib being searched, and - /// JDLookupFlags specifies whether the search should match against - /// hidden symbols. Finally, Symbols describes the set of unresolved - /// symbols and their associated lookup flags. - virtual Error tryToGenerate(LookupKind K, JITDylib &JD, - JITDylibLookupFlags JDLookupFlags, - const SymbolLookupSet &LookupSet) = 0; - }; using AsynchronousSymbolQuerySet = std::set<std::shared_ptr<AsynchronousSymbolQuery>>; @@ -813,6 +913,21 @@ public: /// Get a reference to the ExecutionSession for this JITDylib. ExecutionSession &getExecutionSession() const { return ES; } + /// Calls remove on all trackers currently associated with this JITDylib. + /// Does not run static deinits. + /// + /// Note that removal happens outside the session lock, so new code may be + /// added concurrently while the clear is underway, and the newly added + /// code will *not* be cleared. Adding new code concurrently with a clear + /// is usually a bug and should be avoided. + Error clear(); + + /// Get the default resource tracker for this JITDylib. + ResourceTrackerSP getDefaultResourceTracker(); + + /// Create a resource tracker for this JITDylib. + ResourceTrackerSP createResourceTracker(); + /// Adds a definition generator to this JITDylib and returns a referenece to /// it. /// @@ -873,10 +988,13 @@ public: /// Define all symbols provided by the materialization unit to be part of this /// JITDylib. /// + /// If RT is not specified then the default resource tracker will be used. + /// /// This overload always takes ownership of the MaterializationUnit. If any /// errors occur, the MaterializationUnit consumed. template <typename MaterializationUnitType> - Error define(std::unique_ptr<MaterializationUnitType> &&MU); + Error define(std::unique_ptr<MaterializationUnitType> &&MU, + ResourceTrackerSP RT = nullptr); /// Define all symbols provided by the materialization unit to be part of this /// JITDylib. @@ -886,7 +1004,8 @@ public: /// may allow the caller to modify the MaterializationUnit to correct the /// issue, then re-call define. template <typename MaterializationUnitType> - Error define(std::unique_ptr<MaterializationUnitType> &MU); + Error define(std::unique_ptr<MaterializationUnitType> &MU, + ResourceTrackerSP RT = nullptr); /// Tries to remove the given symbols. /// @@ -900,41 +1019,47 @@ public: /// left unmodified (no symbols are removed). Error remove(const SymbolNameSet &Names); - /// Search the given JITDylib for the symbols in Symbols. If found, store - /// the flags for each symbol in Flags. If any required symbols are not found - /// then an error will be returned. - Expected<SymbolFlagsMap> lookupFlags(LookupKind K, - JITDylibLookupFlags JDLookupFlags, - SymbolLookupSet LookupSet); - /// Dump current JITDylib state to OS. void dump(raw_ostream &OS); - /// FIXME: Remove this when we remove the old ORC layers. - /// Search the given JITDylibs in order for the symbols in Symbols. Results - /// (once they become available) will be returned via the given Query. - /// - /// If any symbol is not found then the unresolved symbols will be returned, - /// and the query will not be applied. The Query is not failed and can be - /// re-used in a subsequent lookup once the symbols have been added, or - /// manually failed. - Expected<SymbolNameSet> - legacyLookup(std::shared_ptr<AsynchronousSymbolQuery> Q, SymbolNameSet Names); + /// Returns the given JITDylibs and all of their transitive dependencies in + /// DFS order (based on linkage relationships). Each JITDylib will appear + /// only once. + static std::vector<JITDylibSP> getDFSLinkOrder(ArrayRef<JITDylibSP> JDs); + + /// Returns the given JITDylibs and all of their transitive dependensies in + /// reverse DFS order (based on linkage relationships). Each JITDylib will + /// appear only once. + static std::vector<JITDylibSP> + getReverseDFSLinkOrder(ArrayRef<JITDylibSP> JDs); + + /// Return this JITDylib and its transitive dependencies in DFS order + /// based on linkage relationships. + std::vector<JITDylibSP> getDFSLinkOrder(); + + /// Rteurn this JITDylib and its transitive dependencies in reverse DFS order + /// based on linkage relationships. + std::vector<JITDylibSP> getReverseDFSLinkOrder(); private: using AsynchronousSymbolQueryList = std::vector<std::shared_ptr<AsynchronousSymbolQuery>>; struct UnmaterializedInfo { - UnmaterializedInfo(std::unique_ptr<MaterializationUnit> MU) - : MU(std::move(MU)) {} + UnmaterializedInfo(std::unique_ptr<MaterializationUnit> MU, + ResourceTracker *RT) + : MU(std::move(MU)), RT(RT) {} std::unique_ptr<MaterializationUnit> MU; + ResourceTracker *RT; }; using UnmaterializedInfosMap = DenseMap<SymbolStringPtr, std::shared_ptr<UnmaterializedInfo>>; + using UnmaterializedInfosList = + std::vector<std::shared_ptr<UnmaterializedInfo>>; + struct MaterializingInfo { SymbolDependenceMap Dependants; SymbolDependenceMap UnemittedDependencies; @@ -1001,25 +1126,16 @@ private: JITDylib(ExecutionSession &ES, std::string Name); - Error defineImpl(MaterializationUnit &MU); - - void lookupFlagsImpl(SymbolFlagsMap &Result, LookupKind K, - JITDylibLookupFlags JDLookupFlags, - SymbolLookupSet &Unresolved); + ResourceTrackerSP getTracker(MaterializationResponsibility &MR); + std::pair<AsynchronousSymbolQuerySet, std::shared_ptr<SymbolDependenceMap>> + removeTracker(ResourceTracker &RT); - Error lodgeQuery(MaterializationUnitList &MUs, - std::shared_ptr<AsynchronousSymbolQuery> &Q, LookupKind K, - JITDylibLookupFlags JDLookupFlags, - SymbolLookupSet &Unresolved); + void transferTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT); - Error lodgeQueryImpl(MaterializationUnitList &MUs, - std::shared_ptr<AsynchronousSymbolQuery> &Q, - LookupKind K, JITDylibLookupFlags JDLookupFlags, - SymbolLookupSet &Unresolved); + Error defineImpl(MaterializationUnit &MU); - bool lookupImpl(std::shared_ptr<AsynchronousSymbolQuery> &Q, - std::vector<std::unique_ptr<MaterializationUnit>> &MUs, - SymbolLookupSet &Unresolved); + void installMaterializationUnit(std::unique_ptr<MaterializationUnit> MU, + ResourceTracker &RT); void detachQueryHelper(AsynchronousSymbolQuery &Q, const SymbolNameSet &QuerySymbols); @@ -1030,29 +1146,45 @@ private: Expected<SymbolFlagsMap> defineMaterializing(SymbolFlagsMap SymbolFlags); - void replace(std::unique_ptr<MaterializationUnit> MU); + Error replace(MaterializationResponsibility &FromMR, + std::unique_ptr<MaterializationUnit> MU); + + Expected<std::unique_ptr<MaterializationResponsibility>> + delegate(MaterializationResponsibility &FromMR, SymbolFlagsMap SymbolFlags, + SymbolStringPtr InitSymbol); SymbolNameSet getRequestedSymbols(const SymbolFlagsMap &SymbolFlags) const; void addDependencies(const SymbolStringPtr &Name, const SymbolDependenceMap &Dependants); - Error resolve(const SymbolMap &Resolved); + Error resolve(MaterializationResponsibility &MR, const SymbolMap &Resolved); + + Error emit(MaterializationResponsibility &MR, const SymbolFlagsMap &Emitted); - Error emit(const SymbolFlagsMap &Emitted); + void unlinkMaterializationResponsibility(MaterializationResponsibility &MR); using FailedSymbolsWorklist = std::vector<std::pair<JITDylib *, SymbolStringPtr>>; - static void notifyFailed(FailedSymbolsWorklist FailedSymbols); + + static std::pair<AsynchronousSymbolQuerySet, + std::shared_ptr<SymbolDependenceMap>> + failSymbols(FailedSymbolsWorklist); ExecutionSession &ES; std::string JITDylibName; + std::mutex GeneratorsMutex; bool Open = true; SymbolTable Symbols; UnmaterializedInfosMap UnmaterializedInfos; MaterializingInfosMap MaterializingInfos; - std::vector<std::unique_ptr<DefinitionGenerator>> DefGenerators; + std::vector<std::shared_ptr<DefinitionGenerator>> DefGenerators; JITDylibSearchOrder LinkOrder; + ResourceTrackerSP DefaultTracker; + + // Map trackers to sets of symbols tracked. + DenseMap<ResourceTracker *, SymbolNameVector> TrackerSymbols; + DenseMap<MaterializationResponsibility *, ResourceTracker *> MRTrackers; }; /// Platforms set up standard symbols and mediate interactions between dynamic @@ -1071,11 +1203,12 @@ public: /// This method will be called under the ExecutionSession lock each time a /// MaterializationUnit is added to a JITDylib. - virtual Error notifyAdding(JITDylib &JD, const MaterializationUnit &MU) = 0; + virtual Error notifyAdding(ResourceTracker &RT, + const MaterializationUnit &MU) = 0; /// This method will be called under the ExecutionSession lock when a - /// VModuleKey is removed. - virtual Error notifyRemoving(JITDylib &JD, VModuleKey K) = 0; + /// ResourceTracker is removed. + virtual Error notifyRemoving(ResourceTracker &RT) = 0; /// A utility function for looking up initializer symbols. Performs a blocking /// lookup for the given symbols in each of the given JITDylibs. @@ -1086,8 +1219,12 @@ public: /// An ExecutionSession represents a running JIT program. class ExecutionSession { - // FIXME: Remove this when we remove the old ORC layers. + friend class InProgressLookupFlagsState; + friend class InProgressFullLookupState; friend class JITDylib; + friend class LookupState; + friend class MaterializationResponsibility; + friend class ResourceTracker; public: /// For reporting errors. @@ -1096,13 +1233,16 @@ public: /// For dispatching MaterializationUnit::materialize calls. using DispatchMaterializationFunction = std::function<void(std::unique_ptr<MaterializationUnit> MU, - MaterializationResponsibility MR)>; + std::unique_ptr<MaterializationResponsibility> MR)>; /// Construct an ExecutionSession. /// /// SymbolStringPools may be shared between ExecutionSessions. ExecutionSession(std::shared_ptr<SymbolStringPool> SSP = nullptr); + /// End the session. Closes all JITDylibs. + Error endSession(); + /// Add a symbol name to the SymbolStringPool and return a pointer to it. SymbolStringPtr intern(StringRef SymName) { return SSP->intern(SymName); } @@ -1122,6 +1262,14 @@ public: return F(); } + /// Register the given ResourceManager with this ExecutionSession. + /// Managers will be notified of events in reverse order of registration. + void registerResourceManager(ResourceManager &RM); + + /// Deregister the given ResourceManager with this ExecutionSession. + /// Manager must have been previously registered. + void deregisterResourceManager(ResourceManager &RM); + /// Return a pointer to the "name" JITDylib. /// Ownership of JITDylib remains within Execution Session JITDylib *getJITDylibByName(StringRef Name); @@ -1147,17 +1295,6 @@ public: /// If no Platform is attached this call is equivalent to createBareJITDylib. Expected<JITDylib &> createJITDylib(std::string Name); - /// Allocate a module key for a new module to add to the JIT. - VModuleKey allocateVModule() { - return runSessionLocked([this]() { return ++LastKey; }); - } - - /// Return a module key to the ExecutionSession so that it can be - /// re-used. This should only be done once all resources associated - /// with the original key have been released. - void releaseVModule(VModuleKey Key) { /* FIXME: Recycle keys */ - } - /// Set the error reporter function. ExecutionSession &setErrorReporter(ErrorReporter ReportError) { this->ReportError = std::move(ReportError); @@ -1176,19 +1313,18 @@ public: return *this; } - void legacyFailQuery(AsynchronousSymbolQuery &Q, Error Err); + /// Search the given JITDylibs to find the flags associated with each of the + /// given symbols. + void lookupFlags(LookupKind K, JITDylibSearchOrder SearchOrder, + SymbolLookupSet Symbols, + unique_function<void(Expected<SymbolFlagsMap>)> OnComplete); - using LegacyAsyncLookupFunction = std::function<SymbolNameSet( - std::shared_ptr<AsynchronousSymbolQuery> Q, SymbolNameSet Names)>; - - /// A legacy lookup function for JITSymbolResolverAdapter. - /// Do not use -- this will be removed soon. - Expected<SymbolMap> - legacyLookup(LegacyAsyncLookupFunction AsyncLookup, SymbolNameSet Names, - SymbolState RequiredState, - RegisterDependenciesFunction RegisterDependencies); + /// Blocking version of lookupFlags. + Expected<SymbolFlagsMap> lookupFlags(LookupKind K, + JITDylibSearchOrder SearchOrder, + SymbolLookupSet Symbols); - /// Search the given JITDylib list for the given symbols. + /// Search the given JITDylibs for the given symbols. /// /// SearchOrder lists the JITDylibs to search. For each dylib, the associated /// boolean indicates whether the search should match against non-exported @@ -1248,10 +1384,11 @@ public: SymbolState RequiredState = SymbolState::Ready); /// Materialize the given unit. - void dispatchMaterialization(std::unique_ptr<MaterializationUnit> MU, - MaterializationResponsibility MR) { + void + dispatchMaterialization(std::unique_ptr<MaterializationUnit> MU, + std::unique_ptr<MaterializationResponsibility> MR) { assert(MU && "MU must be non-null"); - DEBUG_WITH_TYPE("orc", dumpDispatchInfo(MR.getTargetJITDylib(), *MU)); + DEBUG_WITH_TYPE("orc", dumpDispatchInfo(MR->getTargetJITDylib(), *MU)); DispatchMaterialization(std::move(MU), std::move(MR)); } @@ -1263,41 +1400,124 @@ private: logAllUnhandledErrors(std::move(Err), errs(), "JIT session error: "); } - static void - materializeOnCurrentThread(std::unique_ptr<MaterializationUnit> MU, - MaterializationResponsibility MR) { + static void materializeOnCurrentThread( + std::unique_ptr<MaterializationUnit> MU, + std::unique_ptr<MaterializationResponsibility> MR) { MU->materialize(std::move(MR)); } - void runOutstandingMUs(); + void dispatchOutstandingMUs(); + + static std::unique_ptr<MaterializationResponsibility> + createMaterializationResponsibility(ResourceTracker &RT, + SymbolFlagsMap Symbols, + SymbolStringPtr InitSymbol) { + auto &JD = RT.getJITDylib(); + std::unique_ptr<MaterializationResponsibility> MR( + new MaterializationResponsibility(&JD, std::move(Symbols), + std::move(InitSymbol))); + JD.MRTrackers[MR.get()] = &RT; + return MR; + } + + Error removeResourceTracker(ResourceTracker &RT); + void transferResourceTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT); + void destroyResourceTracker(ResourceTracker &RT); + + // State machine functions for query application.. + + /// IL_updateCandidatesFor is called to remove already-defined symbols that + /// match a given query from the set of candidate symbols to generate + /// definitions for (no need to generate a definition if one already exists). + Error IL_updateCandidatesFor(JITDylib &JD, JITDylibLookupFlags JDLookupFlags, + SymbolLookupSet &Candidates, + SymbolLookupSet *NonCandidates); + + /// OL_applyQueryPhase1 is an optionally re-startable loop for triggering + /// definition generation. It is called when a lookup is performed, and again + /// each time that LookupState::continueLookup is called. + void OL_applyQueryPhase1(std::unique_ptr<InProgressLookupState> IPLS, + Error Err); + + /// OL_completeLookup is run once phase 1 successfully completes for a lookup + /// call. It attempts to attach the symbol to all symbol table entries and + /// collect all MaterializationUnits to dispatch. If this method fails then + /// all MaterializationUnits will be left un-materialized. + void OL_completeLookup(std::unique_ptr<InProgressLookupState> IPLS, + std::shared_ptr<AsynchronousSymbolQuery> Q, + RegisterDependenciesFunction RegisterDependencies); + + /// OL_completeLookupFlags is run once phase 1 successfully completes for a + /// lookupFlags call. + void OL_completeLookupFlags( + std::unique_ptr<InProgressLookupState> IPLS, + unique_function<void(Expected<SymbolFlagsMap>)> OnComplete); + + // State machine functions for MaterializationResponsibility. + void OL_destroyMaterializationResponsibility( + MaterializationResponsibility &MR); + SymbolNameSet OL_getRequestedSymbols(const MaterializationResponsibility &MR); + Error OL_notifyResolved(MaterializationResponsibility &MR, + const SymbolMap &Symbols); + Error OL_notifyEmitted(MaterializationResponsibility &MR); + Error OL_defineMaterializing(MaterializationResponsibility &MR, + SymbolFlagsMap SymbolFlags); + void OL_notifyFailed(MaterializationResponsibility &MR); + Error OL_replace(MaterializationResponsibility &MR, + std::unique_ptr<MaterializationUnit> MU); + Expected<std::unique_ptr<MaterializationResponsibility>> + OL_delegate(MaterializationResponsibility &MR, const SymbolNameSet &Symbols); + void OL_addDependencies(MaterializationResponsibility &MR, + const SymbolStringPtr &Name, + const SymbolDependenceMap &Dependencies); + void OL_addDependenciesForAll(MaterializationResponsibility &MR, + const SymbolDependenceMap &Dependencies); #ifndef NDEBUG void dumpDispatchInfo(JITDylib &JD, MaterializationUnit &MU); #endif // NDEBUG mutable std::recursive_mutex SessionMutex; + bool SessionOpen = true; std::shared_ptr<SymbolStringPool> SSP; std::unique_ptr<Platform> P; - VModuleKey LastKey = 0; ErrorReporter ReportError = logErrorsToStdErr; DispatchMaterializationFunction DispatchMaterialization = materializeOnCurrentThread; - std::vector<std::shared_ptr<JITDylib>> JDs; + std::vector<ResourceManager *> ResourceManagers; + + std::vector<JITDylibSP> JDs; // FIXME: Remove this (and runOutstandingMUs) once the linking layer works // with callbacks from asynchronous queries. mutable std::recursive_mutex OutstandingMUsMutex; std::vector<std::pair<std::unique_ptr<MaterializationUnit>, - MaterializationResponsibility>> + std::unique_ptr<MaterializationResponsibility>>> OutstandingMUs; }; +inline ExecutionSession &MaterializationResponsibility::getExecutionSession() { + return JD->getExecutionSession(); +} + +template <typename Func> +Error MaterializationResponsibility::withResourceKeyDo(Func &&F) const { + return JD->getExecutionSession().runSessionLocked([&]() -> Error { + auto I = JD->MRTrackers.find(this); + assert(I != JD->MRTrackers.end() && "No tracker for this MR"); + if (I->second->isDefunct()) + return make_error<ResourceTrackerDefunct>(I->second); + F(I->second->getKeyUnsafe()); + return Error::success(); + }); +} + template <typename GeneratorT> GeneratorT &JITDylib::addGenerator(std::unique_ptr<GeneratorT> DefGenerator) { auto &G = *DefGenerator; - ES.runSessionLocked( - [&]() { DefGenerators.push_back(std::move(DefGenerator)); }); + std::lock_guard<std::mutex> Lock(GeneratorsMutex); + DefGenerators.push_back(std::move(DefGenerator)); return G; } @@ -1308,7 +1528,8 @@ auto JITDylib::withLinkOrderDo(Func &&F) } template <typename MaterializationUnitType> -Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &&MU) { +Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &&MU, + ResourceTrackerSP RT) { assert(MU && "Can not define with a null MU"); if (MU->getSymbols().empty()) { @@ -1320,29 +1541,36 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &&MU) { return Error::success(); } else DEBUG_WITH_TYPE("orc", { - dbgs() << "Defining MU " << MU->getName() << " for " << getName() << "\n"; + dbgs() << "Defining MU " << MU->getName() << " for " << getName() + << " (tracker: "; + if (RT == getDefaultResourceTracker()) + dbgs() << "default)"; + else if (RT) + dbgs() << RT.get() << ")\n"; + else + dbgs() << "0x0, default will be used)\n"; }); return ES.runSessionLocked([&, this]() -> Error { if (auto Err = defineImpl(*MU)) return Err; + if (!RT) + RT = getDefaultResourceTracker(); + if (auto *P = ES.getPlatform()) { - if (auto Err = P->notifyAdding(*this, *MU)) + if (auto Err = P->notifyAdding(*RT, *MU)) return Err; } - /// defineImpl succeeded. - auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU)); - for (auto &KV : UMI->MU->getSymbols()) - UnmaterializedInfos[KV.first] = UMI; - + installMaterializationUnit(std::move(MU), *RT); return Error::success(); }); } template <typename MaterializationUnitType> -Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU) { +Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU, + ResourceTrackerSP RT) { assert(MU && "Can not define with a null MU"); if (MU->getSymbols().empty()) { @@ -1354,30 +1582,36 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU) { return Error::success(); } else DEBUG_WITH_TYPE("orc", { - dbgs() << "Defining MU " << MU->getName() << " for " << getName() << "\n"; + dbgs() << "Defining MU " << MU->getName() << " for " << getName() + << " (tracker: "; + if (RT == getDefaultResourceTracker()) + dbgs() << "default)"; + else if (RT) + dbgs() << RT.get() << ")\n"; + else + dbgs() << "0x0, default will be used)\n"; }); return ES.runSessionLocked([&, this]() -> Error { if (auto Err = defineImpl(*MU)) return Err; + if (!RT) + RT = getDefaultResourceTracker(); + if (auto *P = ES.getPlatform()) { - if (auto Err = P->notifyAdding(*this, *MU)) + if (auto Err = P->notifyAdding(*RT, *MU)) return Err; } - /// defineImpl succeeded. - auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU)); - for (auto &KV : UMI->MU->getSymbols()) - UnmaterializedInfos[KV.first] = UMI; - + installMaterializationUnit(std::move(MU), *RT); return Error::success(); }); } /// ReexportsGenerator can be used with JITDylib::addGenerator to automatically /// re-export a subset of the source JITDylib's symbols in the target. -class ReexportsGenerator : public JITDylib::DefinitionGenerator { +class ReexportsGenerator : public DefinitionGenerator { public: using SymbolPredicate = std::function<bool(SymbolStringPtr)>; @@ -1388,7 +1622,7 @@ public: JITDylibLookupFlags SourceJDLookupFlags, SymbolPredicate Allow = SymbolPredicate()); - Error tryToGenerate(LookupKind K, JITDylib &JD, + Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags, const SymbolLookupSet &LookupSet) override; @@ -1398,6 +1632,57 @@ private: SymbolPredicate Allow; }; +// --------------- IMPLEMENTATION -------------- +// Implementations for inline functions/methods. +// --------------------------------------------- + +inline MaterializationResponsibility::~MaterializationResponsibility() { + JD->getExecutionSession().OL_destroyMaterializationResponsibility(*this); +} + +inline SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const { + return JD->getExecutionSession().OL_getRequestedSymbols(*this); +} + +inline Error MaterializationResponsibility::notifyResolved( + const SymbolMap &Symbols) { + return JD->getExecutionSession().OL_notifyResolved(*this, Symbols); +} + +inline Error MaterializationResponsibility::notifyEmitted() { + return JD->getExecutionSession().OL_notifyEmitted(*this); +} + +inline Error MaterializationResponsibility::defineMaterializing( + SymbolFlagsMap SymbolFlags) { + return JD->getExecutionSession().OL_defineMaterializing( + *this, std::move(SymbolFlags)); +} + +inline void MaterializationResponsibility::failMaterialization() { + JD->getExecutionSession().OL_notifyFailed(*this); +} + +inline Error MaterializationResponsibility::replace( + std::unique_ptr<MaterializationUnit> MU) { + return JD->getExecutionSession().OL_replace(*this, std::move(MU)); +} + +inline Expected<std::unique_ptr<MaterializationResponsibility>> +MaterializationResponsibility::delegate(const SymbolNameSet &Symbols) { + return JD->getExecutionSession().OL_delegate(*this, Symbols); +} + +inline void MaterializationResponsibility::addDependencies( + const SymbolStringPtr &Name, const SymbolDependenceMap &Dependencies) { + JD->getExecutionSession().OL_addDependencies(*this, Name, Dependencies); +} + +inline void MaterializationResponsibility::addDependenciesForAll( + const SymbolDependenceMap &Dependencies) { + JD->getExecutionSession().OL_addDependenciesForAll(*this, Dependencies); +} + } // End namespace orc } // End namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h index 3b824b83b052..fdddc9694d0b 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h @@ -18,7 +18,7 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/Mangling.h" -#include "llvm/ExecutionEngine/Orc/OrcError.h" +#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/Object/Archive.h" #include "llvm/Support/DynamicLibrary.h" @@ -41,17 +41,6 @@ namespace orc { class ObjectLayer; -/// Run a main function, returning the result. -/// -/// If the optional ProgramName argument is given then it will be inserted -/// before the strings in Args as the first argument to the called function. -/// -/// It is legal to have an empty argument list and no program name, however -/// many main functions will expect a name argument at least, and will fail -/// if none is provided. -int runAsMain(int (*Main)(int, char *[]), ArrayRef<std::string> Args, - Optional<StringRef> ProgramName = None); - /// This iterator provides a convenient way to iterate over the elements /// of an llvm.global_ctors/llvm.global_dtors instance. /// @@ -152,56 +141,6 @@ inline iterator_range<StaticInitGVIterator> getStaticInitGVs(Module &M) { return make_range(StaticInitGVIterator(M), StaticInitGVIterator()); } -/// Convenience class for recording constructor/destructor names for -/// later execution. -template <typename JITLayerT> -class LegacyCtorDtorRunner { -public: - /// Construct a CtorDtorRunner for the given range using the given - /// name mangling function. - LLVM_ATTRIBUTE_DEPRECATED( - LegacyCtorDtorRunner(std::vector<std::string> CtorDtorNames, - VModuleKey K), - "ORCv1 utilities (utilities with the 'Legacy' prefix) are deprecated. " - "Please use the ORCv2 CtorDtorRunner utility instead"); - - LegacyCtorDtorRunner(ORCv1DeprecationAcknowledgement, - std::vector<std::string> CtorDtorNames, VModuleKey K) - : CtorDtorNames(std::move(CtorDtorNames)), K(K) {} - - /// Run the recorded constructors/destructors through the given JIT - /// layer. - Error runViaLayer(JITLayerT &JITLayer) const { - using CtorDtorTy = void (*)(); - - for (const auto &CtorDtorName : CtorDtorNames) { - if (auto CtorDtorSym = JITLayer.findSymbolIn(K, CtorDtorName, false)) { - if (auto AddrOrErr = CtorDtorSym.getAddress()) { - CtorDtorTy CtorDtor = - reinterpret_cast<CtorDtorTy>(static_cast<uintptr_t>(*AddrOrErr)); - CtorDtor(); - } else - return AddrOrErr.takeError(); - } else { - if (auto Err = CtorDtorSym.takeError()) - return Err; - else - return make_error<JITSymbolNotFound>(CtorDtorName); - } - } - return Error::success(); - } - -private: - std::vector<std::string> CtorDtorNames; - orc::VModuleKey K; -}; - -template <typename JITLayerT> -LegacyCtorDtorRunner<JITLayerT>::LegacyCtorDtorRunner( - std::vector<std::string> CtorDtorNames, VModuleKey K) - : CtorDtorNames(std::move(CtorDtorNames)), K(K) {} - class CtorDtorRunner { public: CtorDtorRunner(JITDylib &JD) : JD(JD) {} @@ -250,45 +189,6 @@ protected: void *DSOHandle); }; -class LegacyLocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase { -public: - /// Create a runtime-overrides class. - template <typename MangleFtorT> - LLVM_ATTRIBUTE_DEPRECATED( - LegacyLocalCXXRuntimeOverrides(const MangleFtorT &Mangle), - "ORCv1 utilities (utilities with the 'Legacy' prefix) are deprecated. " - "Please use the ORCv2 LocalCXXRuntimeOverrides utility instead"); - - template <typename MangleFtorT> - LegacyLocalCXXRuntimeOverrides(ORCv1DeprecationAcknowledgement, - const MangleFtorT &Mangle) { - addOverride(Mangle("__dso_handle"), toTargetAddress(&DSOHandleOverride)); - addOverride(Mangle("__cxa_atexit"), toTargetAddress(&CXAAtExitOverride)); - } - - /// Search overrided symbols. - JITEvaluatedSymbol searchOverrides(const std::string &Name) { - auto I = CXXRuntimeOverrides.find(Name); - if (I != CXXRuntimeOverrides.end()) - return JITEvaluatedSymbol(I->second, JITSymbolFlags::Exported); - return nullptr; - } - -private: - void addOverride(const std::string &Name, JITTargetAddress Addr) { - CXXRuntimeOverrides.insert(std::make_pair(Name, Addr)); - } - - StringMap<JITTargetAddress> CXXRuntimeOverrides; -}; - -template <typename MangleFtorT> -LegacyLocalCXXRuntimeOverrides::LegacyLocalCXXRuntimeOverrides( - const MangleFtorT &Mangle) { - addOverride(Mangle("__dso_handle"), toTargetAddress(&DSOHandleOverride)); - addOverride(Mangle("__cxa_atexit"), toTargetAddress(&CXAAtExitOverride)); -} - class LocalCXXRuntimeOverrides : public LocalCXXRuntimeOverridesBase { public: Error enable(JITDylib &JD, MangleAndInterner &Mangler); @@ -315,7 +215,7 @@ private: /// If an instance of this class is attached to a JITDylib as a fallback /// definition generator, then any symbol found in the given DynamicLibrary that /// passes the 'Allow' predicate will be added to the JITDylib. -class DynamicLibrarySearchGenerator : public JITDylib::DefinitionGenerator { +class DynamicLibrarySearchGenerator : public DefinitionGenerator { public: using SymbolPredicate = std::function<bool(const SymbolStringPtr &)>; @@ -343,7 +243,7 @@ public: return Load(nullptr, GlobalPrefix, std::move(Allow)); } - Error tryToGenerate(LookupKind K, JITDylib &JD, + Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags, const SymbolLookupSet &Symbols) override; @@ -358,7 +258,7 @@ private: /// If an instance of this class is attached to a JITDylib as a fallback /// definition generator, then any symbol found in the archive will result in /// the containing object being added to the JITDylib. -class StaticLibraryDefinitionGenerator : public JITDylib::DefinitionGenerator { +class StaticLibraryDefinitionGenerator : public DefinitionGenerator { public: /// Try to create a StaticLibraryDefinitionGenerator from the given path. /// @@ -381,7 +281,7 @@ public: static Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>> Create(ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer); - Error tryToGenerate(LookupKind K, JITDylib &JD, + Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags, const SymbolLookupSet &Symbols) override; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h deleted file mode 100644 index a4e43d4e1c9c..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h +++ /dev/null @@ -1,111 +0,0 @@ -//===- GlobalMappingLayer.h - Run all IR through a functor ------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Convenience layer for injecting symbols that will appear in calls to -// findSymbol. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H -#define LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H - -#include "llvm/ExecutionEngine/JITSymbol.h" -#include <map> -#include <memory> -#include <string> - -namespace llvm { - -class Module; -class JITSymbolResolver; - -namespace orc { - -/// Global mapping layer. -/// -/// This layer overrides the findSymbol method to first search a local symbol -/// table that the client can define. It can be used to inject new symbol -/// mappings into the JIT. Beware, however: symbols within a single IR module or -/// object file will still resolve locally (via RuntimeDyld's symbol table) - -/// such internal references cannot be overriden via this layer. -template <typename BaseLayerT> -class GlobalMappingLayer { -public: - - /// Handle to an added module. - using ModuleHandleT = typename BaseLayerT::ModuleHandleT; - - /// Construct an GlobalMappingLayer with the given BaseLayer - GlobalMappingLayer(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {} - - /// Add the given module to the JIT. - /// @return A handle for the added modules. - Expected<ModuleHandleT> - addModule(std::shared_ptr<Module> M, - std::shared_ptr<JITSymbolResolver> Resolver) { - return BaseLayer.addModule(std::move(M), std::move(Resolver)); - } - - /// Remove the module set associated with the handle H. - Error removeModule(ModuleHandleT H) { return BaseLayer.removeModule(H); } - - /// Manually set the address to return for the given symbol. - void setGlobalMapping(const std::string &Name, JITTargetAddress Addr) { - SymbolTable[Name] = Addr; - } - - /// Remove the given symbol from the global mapping. - void eraseGlobalMapping(const std::string &Name) { - SymbolTable.erase(Name); - } - - /// Search for the given named symbol. - /// - /// This method will first search the local symbol table, returning - /// any symbol found there. If the symbol is not found in the local - /// table then this call will be passed through to the base layer. - /// - /// @param Name The name of the symbol to search for. - /// @param ExportedSymbolsOnly If true, search only for exported symbols. - /// @return A handle for the given named symbol, if it exists. - JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) { - auto I = SymbolTable.find(Name); - if (I != SymbolTable.end()) - return JITSymbol(I->second, JITSymbolFlags::Exported); - return BaseLayer.findSymbol(Name, ExportedSymbolsOnly); - } - - /// Get the address of the given symbol in the context of the of the - /// module represented by the handle H. This call is forwarded to the - /// base layer's implementation. - /// @param H The handle for the module to search in. - /// @param Name The name of the symbol to search for. - /// @param ExportedSymbolsOnly If true, search only for exported symbols. - /// @return A handle for the given named symbol, if it is found in the - /// given module. - JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name, - bool ExportedSymbolsOnly) { - return BaseLayer.findSymbolIn(H, Name, ExportedSymbolsOnly); - } - - /// Immediately emit and finalize the module set represented by the - /// given handle. - /// @param H Handle for module set to emit/finalize. - Error emitAndFinalize(ModuleHandleT H) { - return BaseLayer.emitAndFinalize(H); - } - -private: - BaseLayerT &BaseLayer; - std::map<std::string, JITTargetAddress> SymbolTable; -}; - -} // end namespace orc -} // end namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h index eb74d283f043..f8fdb171bbf9 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h @@ -45,8 +45,8 @@ public: IRSymbolMapper::ManglingOptions MO; }; - using NotifyCompiledFunction = - std::function<void(VModuleKey K, ThreadSafeModule TSM)>; + using NotifyCompiledFunction = std::function<void( + MaterializationResponsibility &R, ThreadSafeModule TSM)>; IRCompileLayer(ExecutionSession &ES, ObjectLayer &BaseLayer, std::unique_ptr<IRCompiler> Compile); @@ -55,7 +55,8 @@ public: void setNotifyCompiled(NotifyCompiledFunction NotifyCompiled); - void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + void emit(std::unique_ptr<MaterializationResponsibility> R, + ThreadSafeModule TSM) override; private: mutable std::mutex IRLayerMutex; @@ -65,99 +66,6 @@ private: NotifyCompiledFunction NotifyCompiled = NotifyCompiledFunction(); }; -/// Eager IR compiling layer. -/// -/// This layer immediately compiles each IR module added via addModule to an -/// object file and adds this module file to the layer below, which must -/// implement the object layer concept. -template <typename BaseLayerT, typename CompileFtor> -class LegacyIRCompileLayer { -public: - /// Callback type for notifications when modules are compiled. - using NotifyCompiledCallback = - std::function<void(VModuleKey K, std::unique_ptr<Module>)>; - - /// Construct an LegacyIRCompileLayer with the given BaseLayer, which must - /// implement the ObjectLayer concept. - LLVM_ATTRIBUTE_DEPRECATED( - LegacyIRCompileLayer( - BaseLayerT &BaseLayer, CompileFtor Compile, - NotifyCompiledCallback NotifyCompiled = NotifyCompiledCallback()), - "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please " - "use " - "the ORCv2 IRCompileLayer instead"); - - /// Legacy layer constructor with deprecation acknowledgement. - LegacyIRCompileLayer( - ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer, - CompileFtor Compile, - NotifyCompiledCallback NotifyCompiled = NotifyCompiledCallback()) - : BaseLayer(BaseLayer), Compile(std::move(Compile)), - NotifyCompiled(std::move(NotifyCompiled)) {} - - /// Get a reference to the compiler functor. - CompileFtor& getCompiler() { return Compile; } - - /// (Re)set the NotifyCompiled callback. - void setNotifyCompiled(NotifyCompiledCallback NotifyCompiled) { - this->NotifyCompiled = std::move(NotifyCompiled); - } - - /// Compile the module, and add the resulting object to the base layer - /// along with the given memory manager and symbol resolver. - Error addModule(VModuleKey K, std::unique_ptr<Module> M) { - auto Obj = Compile(*M); - if (!Obj) - return Obj.takeError(); - if (auto Err = BaseLayer.addObject(std::move(K), std::move(*Obj))) - return Err; - if (NotifyCompiled) - NotifyCompiled(std::move(K), std::move(M)); - return Error::success(); - } - - /// Remove the module associated with the VModuleKey K. - Error removeModule(VModuleKey K) { return BaseLayer.removeObject(K); } - - /// Search for the given named symbol. - /// @param Name The name of the symbol to search for. - /// @param ExportedSymbolsOnly If true, search only for exported symbols. - /// @return A handle for the given named symbol, if it exists. - JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) { - return BaseLayer.findSymbol(Name, ExportedSymbolsOnly); - } - - /// Get the address of the given symbol in compiled module represented - /// by the handle H. This call is forwarded to the base layer's - /// implementation. - /// @param K The VModuleKey for the module to search in. - /// @param Name The name of the symbol to search for. - /// @param ExportedSymbolsOnly If true, search only for exported symbols. - /// @return A handle for the given named symbol, if it is found in the - /// given module. - JITSymbol findSymbolIn(VModuleKey K, const std::string &Name, - bool ExportedSymbolsOnly) { - return BaseLayer.findSymbolIn(K, Name, ExportedSymbolsOnly); - } - - /// Immediately emit and finalize the module represented by the given - /// handle. - /// @param K The VModuleKey for the module to emit/finalize. - Error emitAndFinalize(VModuleKey K) { return BaseLayer.emitAndFinalize(K); } - -private: - BaseLayerT &BaseLayer; - CompileFtor Compile; - NotifyCompiledCallback NotifyCompiled; -}; - -template <typename BaseLayerT, typename CompileFtor> -LegacyIRCompileLayer<BaseLayerT, CompileFtor>::LegacyIRCompileLayer( - BaseLayerT &BaseLayer, CompileFtor Compile, - NotifyCompiledCallback NotifyCompiled) - : BaseLayer(BaseLayer), Compile(std::move(Compile)), - NotifyCompiled(std::move(NotifyCompiled)) {} - } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h index 296d74ae6b86..66966a0f8762 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h @@ -13,6 +13,7 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_IRTRANSFORMLAYER_H #define LLVM_EXECUTIONENGINE_ORC_IRTRANSFORMLAYER_H +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/Layer.h" #include <memory> @@ -27,7 +28,7 @@ namespace orc { /// before operating on the module. class IRTransformLayer : public IRLayer { public: - using TransformFunction = std::function<Expected<ThreadSafeModule>( + using TransformFunction = unique_function<Expected<ThreadSafeModule>( ThreadSafeModule, MaterializationResponsibility &R)>; IRTransformLayer(ExecutionSession &ES, IRLayer &BaseLayer, @@ -37,7 +38,8 @@ public: this->Transform = std::move(Transform); } - void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + void emit(std::unique_ptr<MaterializationResponsibility> R, + ThreadSafeModule TSM) override; static ThreadSafeModule identityTransform(ThreadSafeModule TSM, MaterializationResponsibility &R) { @@ -49,80 +51,6 @@ private: TransformFunction Transform; }; -/// IR mutating layer. -/// -/// This layer applies a user supplied transform to each module that is added, -/// then adds the transformed module to the layer below. -template <typename BaseLayerT, typename TransformFtor> -class LegacyIRTransformLayer { -public: - - /// Construct an LegacyIRTransformLayer with the given BaseLayer - LLVM_ATTRIBUTE_DEPRECATED( - LegacyIRTransformLayer(BaseLayerT &BaseLayer, - TransformFtor Transform = TransformFtor()), - "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please " - "use " - "the ORCv2 IRTransformLayer instead"); - - /// Legacy layer constructor with deprecation acknowledgement. - LegacyIRTransformLayer(ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer, - TransformFtor Transform = TransformFtor()) - : BaseLayer(BaseLayer), Transform(std::move(Transform)) {} - - /// Apply the transform functor to the module, then add the module to - /// the layer below, along with the memory manager and symbol resolver. - /// - /// @return A handle for the added modules. - Error addModule(VModuleKey K, std::unique_ptr<Module> M) { - return BaseLayer.addModule(std::move(K), Transform(std::move(M))); - } - - /// Remove the module associated with the VModuleKey K. - Error removeModule(VModuleKey K) { return BaseLayer.removeModule(K); } - - /// Search for the given named symbol. - /// @param Name The name of the symbol to search for. - /// @param ExportedSymbolsOnly If true, search only for exported symbols. - /// @return A handle for the given named symbol, if it exists. - JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) { - return BaseLayer.findSymbol(Name, ExportedSymbolsOnly); - } - - /// Get the address of the given symbol in the context of the module - /// represented by the VModuleKey K. This call is forwarded to the base - /// layer's implementation. - /// @param K The VModuleKey for the module to search in. - /// @param Name The name of the symbol to search for. - /// @param ExportedSymbolsOnly If true, search only for exported symbols. - /// @return A handle for the given named symbol, if it is found in the - /// given module. - JITSymbol findSymbolIn(VModuleKey K, const std::string &Name, - bool ExportedSymbolsOnly) { - return BaseLayer.findSymbolIn(K, Name, ExportedSymbolsOnly); - } - - /// Immediately emit and finalize the module represented by the given - /// VModuleKey. - /// @param K The VModuleKey for the module to emit/finalize. - Error emitAndFinalize(VModuleKey K) { return BaseLayer.emitAndFinalize(K); } - - /// Access the transform functor directly. - TransformFtor& getTransform() { return Transform; } - - /// Access the mumate functor directly. - const TransformFtor& getTransform() const { return Transform; } - -private: - BaseLayerT &BaseLayer; - TransformFtor Transform; -}; - -template <typename BaseLayerT, typename TransformFtor> -LegacyIRTransformLayer<BaseLayerT, TransformFtor>::LegacyIRTransformLayer( - BaseLayerT &BaseLayer, TransformFtor Transform) - : BaseLayer(BaseLayer), Transform(std::move(Transform)) {} - } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index e0cfd8bf2409..78e3ceef50e2 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -62,14 +62,33 @@ public: JITTargetAddress TrampolineAddr, NotifyLandingResolvedFunction OnLandingResolved) const>; - virtual ~TrampolinePool() {} + virtual ~TrampolinePool(); /// Get an available trampoline address. /// Returns an error if no trampoline can be created. - virtual Expected<JITTargetAddress> getTrampoline() = 0; + Expected<JITTargetAddress> getTrampoline() { + std::lock_guard<std::mutex> Lock(TPMutex); + if (AvailableTrampolines.empty()) { + if (auto Err = grow()) + return std::move(Err); + } + assert(!AvailableTrampolines.empty() && "Failed to grow trampoline pool"); + auto TrampolineAddr = AvailableTrampolines.back(); + AvailableTrampolines.pop_back(); + return TrampolineAddr; + } -private: - virtual void anchor(); + /// Returns the given trampoline to the pool for re-use. + void releaseTrampoline(JITTargetAddress TrampolineAddr) { + std::lock_guard<std::mutex> Lock(TPMutex); + AvailableTrampolines.push_back(TrampolineAddr); + } + +protected: + virtual Error grow() = 0; + + std::mutex TPMutex; + std::vector<JITTargetAddress> AvailableTrampolines; }; /// A trampoline pool for trampolines within the current process. @@ -90,26 +109,6 @@ public: return std::move(LTP); } - /// Get a free trampoline. Returns an error if one can not be provided (e.g. - /// because the pool is empty and can not be grown). - Expected<JITTargetAddress> getTrampoline() override { - std::lock_guard<std::mutex> Lock(LTPMutex); - if (AvailableTrampolines.empty()) { - if (auto Err = grow()) - return std::move(Err); - } - assert(!AvailableTrampolines.empty() && "Failed to grow trampoline pool"); - auto TrampolineAddr = AvailableTrampolines.back(); - AvailableTrampolines.pop_back(); - return TrampolineAddr; - } - - /// Returns the given trampoline to the pool for re-use. - void releaseTrampoline(JITTargetAddress TrampolineAddr) { - std::lock_guard<std::mutex> Lock(LTPMutex); - AvailableTrampolines.push_back(TrampolineAddr); - } - private: static JITTargetAddress reenter(void *TrampolinePoolPtr, void *TrampolineId) { LocalTrampolinePool<ORCABI> *TrampolinePool = @@ -154,8 +153,8 @@ private: } } - Error grow() { - assert(this->AvailableTrampolines.empty() && "Growing prematurely?"); + Error grow() override { + assert(AvailableTrampolines.empty() && "Growing prematurely?"); std::error_code EC; auto TrampolineBlock = @@ -175,7 +174,7 @@ private: pointerToJITTargetAddress(ResolverBlock.base()), NumTrampolines); for (unsigned I = 0; I < NumTrampolines; ++I) - this->AvailableTrampolines.push_back(pointerToJITTargetAddress( + AvailableTrampolines.push_back(pointerToJITTargetAddress( TrampolineMem + (I * ORCABI::TrampolineSize))); if (auto EC = sys::Memory::protectMappedMemory( @@ -189,10 +188,8 @@ private: ResolveLandingFunction ResolveLanding; - std::mutex LTPMutex; sys::OwningMemoryBlock ResolverBlock; std::vector<sys::OwningMemoryBlock> TrampolineBlocks; - std::vector<JITTargetAddress> AvailableTrampolines; }; /// Target-independent base class for compile callback management. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h index 96f8e169e7dc..ff0aa0238523 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h @@ -19,7 +19,6 @@ #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" #include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" -#include "llvm/ExecutionEngine/Orc/ObjectTransformLayer.h" #include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ThreadPool.h" @@ -29,6 +28,8 @@ namespace orc { class LLJITBuilderState; class LLLazyJITBuilderState; +class ObjectTransformLayer; +class TargetProcessControl; /// A pre-fabricated ORC JIT stack that can serve as an alternative to MCJIT. /// @@ -85,21 +86,8 @@ public: return ES->createJITDylib(std::move(Name)); } - /// A convenience method for defining MUs in LLJIT's Main JITDylib. This can - /// be useful for succinctly defining absolute symbols, aliases and - /// re-exports. - template <typename MUType> - Error define(std::unique_ptr<MUType> &&MU) { - return Main->define(std::move(MU)); - } - - /// A convenience method for defining MUs in LLJIT's Main JITDylib. This can - /// be usedful for succinctly defining absolute symbols, aliases and - /// re-exports. - template <typename MUType> - Error define(std::unique_ptr<MUType> &MU) { - return Main->define(MU); - } + /// Adds an IR module with the given ResourceTracker. + Error addIRModule(ResourceTrackerSP RT, ThreadSafeModule TSM); /// Adds an IR module to the given JITDylib. Error addIRModule(JITDylib &JD, ThreadSafeModule TSM); @@ -110,6 +98,9 @@ public: } /// Adds an object file to the given JITDylib. + Error addObjectFile(ResourceTrackerSP RT, std::unique_ptr<MemoryBuffer> Obj); + + /// Adds an object file to the given JITDylib. Error addObjectFile(JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj); /// Adds an object file to the given JITDylib. @@ -178,7 +169,7 @@ public: ObjectLayer &getObjLinkingLayer() { return *ObjLinkingLayer; } /// Returns a reference to the object transform layer. - ObjectTransformLayer &getObjTransformLayer() { return ObjTransformLayer; } + ObjectTransformLayer &getObjTransformLayer() { return *ObjTransformLayer; } /// Returns a reference to the IR transform layer. IRTransformLayer &getIRTransformLayer() { return *TransformLayer; } @@ -195,7 +186,7 @@ public: } protected: - static std::unique_ptr<ObjectLayer> + static Expected<std::unique_ptr<ObjectLayer>> createObjectLinkingLayer(LLJITBuilderState &S, ExecutionSession &ES); static Expected<std::unique_ptr<IRCompileLayer::IRCompiler>> @@ -218,7 +209,7 @@ protected: std::unique_ptr<ThreadPool> CompileThreads; std::unique_ptr<ObjectLayer> ObjLinkingLayer; - ObjectTransformLayer ObjTransformLayer; + std::unique_ptr<ObjectTransformLayer> ObjTransformLayer; std::unique_ptr<IRCompileLayer> CompileLayer; std::unique_ptr<IRTransformLayer> TransformLayer; std::unique_ptr<IRTransformLayer> InitHelperTransformLayer; @@ -237,6 +228,9 @@ public: CODLayer->setPartitionFunction(std::move(Partition)); } + /// Returns a reference to the on-demand layer. + CompileOnDemandLayer &getCompileOnDemandLayer() { return *CODLayer; } + /// Add a module to be lazily compiled to JITDylib JD. Error addLazyIRModule(JITDylib &JD, ThreadSafeModule M); @@ -256,8 +250,9 @@ private: class LLJITBuilderState { public: - using ObjectLinkingLayerCreator = std::function<std::unique_ptr<ObjectLayer>( - ExecutionSession &, const Triple &TT)>; + using ObjectLinkingLayerCreator = + std::function<Expected<std::unique_ptr<ObjectLayer>>(ExecutionSession &, + const Triple &)>; using CompileFunctionCreator = std::function<Expected<std::unique_ptr<IRCompileLayer::IRCompiler>>( @@ -272,6 +267,7 @@ public: CompileFunctionCreator CreateCompileFunction; PlatformSetupFunction SetUpPlatform; unsigned NumCompileThreads = 0; + TargetProcessControl *TPC = nullptr; /// Called prior to JIT class construcion to fix up defaults. Error prepareForConstruction(); @@ -354,6 +350,17 @@ public: return impl(); } + /// Set a TargetProcessControl object. + /// + /// If the platform uses ObjectLinkingLayer by default and no + /// ObjectLinkingLayerCreator has been set then the TargetProcessControl + /// object will be used to supply the memory manager for the + /// ObjectLinkingLayer. + SetterImpl &setTargetProcessControl(TargetProcessControl &TPC) { + impl().TPC = &TPC; + return impl(); + } + /// Create an instance of the JIT. Expected<std::unique_ptr<JITType>> create() { if (auto Err = impl().prepareForConstruction()) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h deleted file mode 100644 index b31914f12a0d..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h +++ /dev/null @@ -1,84 +0,0 @@ -//===- LambdaResolverMM - Redirect symbol lookup via a functor --*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Defines a RuntimeDyld::SymbolResolver subclass that uses a user-supplied -// functor for symbol resolution. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_LAMBDARESOLVER_H -#define LLVM_EXECUTIONENGINE_ORC_LAMBDARESOLVER_H - -#include "llvm/ADT/STLExtras.h" -#include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/OrcV1Deprecation.h" -#include <memory> - -namespace llvm { -namespace orc { - -template <typename DylibLookupFtorT, typename ExternalLookupFtorT> -class LambdaResolver : public LegacyJITSymbolResolver { -public: - LLVM_ATTRIBUTE_DEPRECATED( - LambdaResolver(DylibLookupFtorT DylibLookupFtor, - ExternalLookupFtorT ExternalLookupFtor), - "ORCv1 utilities (including resolvers) are deprecated and will be " - "removed " - "in the next release. Please use ORCv2 (see docs/ORCv2.rst)"); - - LambdaResolver(ORCv1DeprecationAcknowledgement, - DylibLookupFtorT DylibLookupFtor, - ExternalLookupFtorT ExternalLookupFtor) - : DylibLookupFtor(DylibLookupFtor), - ExternalLookupFtor(ExternalLookupFtor) {} - - JITSymbol findSymbolInLogicalDylib(const std::string &Name) final { - return DylibLookupFtor(Name); - } - - JITSymbol findSymbol(const std::string &Name) final { - return ExternalLookupFtor(Name); - } - -private: - DylibLookupFtorT DylibLookupFtor; - ExternalLookupFtorT ExternalLookupFtor; -}; - -template <typename DylibLookupFtorT, typename ExternalLookupFtorT> -LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>::LambdaResolver( - DylibLookupFtorT DylibLookupFtor, ExternalLookupFtorT ExternalLookupFtor) - : DylibLookupFtor(DylibLookupFtor), ExternalLookupFtor(ExternalLookupFtor) { -} - -template <typename DylibLookupFtorT, - typename ExternalLookupFtorT> -std::shared_ptr<LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>> -createLambdaResolver(DylibLookupFtorT DylibLookupFtor, - ExternalLookupFtorT ExternalLookupFtor) { - using LR = LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>; - return std::make_unique<LR>(std::move(DylibLookupFtor), - std::move(ExternalLookupFtor)); -} - -template <typename DylibLookupFtorT, typename ExternalLookupFtorT> -std::shared_ptr<LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>> -createLambdaResolver(ORCv1DeprecationAcknowledgement, - DylibLookupFtorT DylibLookupFtor, - ExternalLookupFtorT ExternalLookupFtor) { - using LR = LambdaResolver<DylibLookupFtorT, ExternalLookupFtorT>; - return std::make_unique<LR>(AcknowledgeORCv1Deprecation, - std::move(DylibLookupFtor), - std::move(ExternalLookupFtor)); -} - -} // end namespace orc -} // end namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORC_LAMBDARESOLVER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h index e843d0f56245..f9cc15583b42 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Layer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Layer.h @@ -34,15 +34,15 @@ public: /// SymbolFlags and SymbolToDefinition maps. IRMaterializationUnit(ExecutionSession &ES, const IRSymbolMapper::ManglingOptions &MO, - ThreadSafeModule TSM, VModuleKey K); + ThreadSafeModule TSM); /// Create an IRMaterializationLayer from a module, and pre-existing /// SymbolFlags and SymbolToDefinition maps. The maps must provide /// entries for each definition in M. /// This constructor is useful for delegating work from one /// IRMaterializationUnit to another. - IRMaterializationUnit(ThreadSafeModule TSM, VModuleKey K, - SymbolFlagsMap SymbolFlags, SymbolStringPtr InitSymbol, + IRMaterializationUnit(ThreadSafeModule TSM, SymbolFlagsMap SymbolFlags, + SymbolStringPtr InitSymbol, SymbolNameToDefinitionMap SymbolToDefinition); /// Return the ModuleIdentifier as the name for this MaterializationUnit. @@ -94,13 +94,19 @@ public: /// Returns the current value of the CloneToNewContextOnEmit flag. bool getCloneToNewContextOnEmit() const { return CloneToNewContextOnEmit; } + /// Add a MaterializatinoUnit representing the given IR to the JITDylib + /// targeted by the given tracker. + virtual Error add(ResourceTrackerSP RT, ThreadSafeModule TSM); + /// Adds a MaterializationUnit representing the given IR to the given - /// JITDylib. - virtual Error add(JITDylib &JD, ThreadSafeModule TSM, - VModuleKey K = VModuleKey()); + /// JITDylib. If RT is not specif + Error add(JITDylib &JD, ThreadSafeModule TSM) { + return add(JD.getDefaultResourceTracker(), std::move(TSM)); + } /// Emit should materialize the given IR. - virtual void emit(MaterializationResponsibility R, ThreadSafeModule TSM) = 0; + virtual void emit(std::unique_ptr<MaterializationResponsibility> R, + ThreadSafeModule TSM) = 0; private: bool CloneToNewContextOnEmit = false; @@ -114,14 +120,12 @@ class BasicIRLayerMaterializationUnit : public IRMaterializationUnit { public: BasicIRLayerMaterializationUnit(IRLayer &L, const IRSymbolMapper::ManglingOptions &MO, - ThreadSafeModule TSM, VModuleKey K); + ThreadSafeModule TSM); private: - - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr<MaterializationResponsibility> R) override; IRLayer &L; - VModuleKey K; }; /// Interface for Layers that accept object files. @@ -135,11 +139,14 @@ public: /// Adds a MaterializationUnit representing the given IR to the given /// JITDylib. - virtual Error add(JITDylib &JD, std::unique_ptr<MemoryBuffer> O, - VModuleKey K = VModuleKey()); + virtual Error add(ResourceTrackerSP RT, std::unique_ptr<MemoryBuffer> O); + + Error add(JITDylib &JD, std::unique_ptr<MemoryBuffer> O) { + return add(JD.getDefaultResourceTracker(), std::move(O)); + } /// Emit should materialize the given IR. - virtual void emit(MaterializationResponsibility R, + virtual void emit(std::unique_ptr<MaterializationResponsibility> R, std::unique_ptr<MemoryBuffer> O) = 0; private: @@ -151,9 +158,9 @@ private: class BasicObjectLayerMaterializationUnit : public MaterializationUnit { public: static Expected<std::unique_ptr<BasicObjectLayerMaterializationUnit>> - Create(ObjectLayer &L, VModuleKey K, std::unique_ptr<MemoryBuffer> O); + Create(ObjectLayer &L, std::unique_ptr<MemoryBuffer> O); - BasicObjectLayerMaterializationUnit(ObjectLayer &L, VModuleKey K, + BasicObjectLayerMaterializationUnit(ObjectLayer &L, std::unique_ptr<MemoryBuffer> O, SymbolFlagsMap SymbolFlags, SymbolStringPtr InitSymbol); @@ -162,8 +169,7 @@ public: StringRef getName() const override; private: - - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr<MaterializationResponsibility> R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; ObjectLayer &L; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h deleted file mode 100644 index 84f5e0350c2e..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h +++ /dev/null @@ -1,267 +0,0 @@ -//===- LazyEmittingLayer.h - Lazily emit IR to lower JIT layers -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Contains the definition for a lazy-emitting layer for the JIT. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_LAZYEMITTINGLAYER_H -#define LLVM_EXECUTIONENGINE_ORC_LAZYEMITTINGLAYER_H - -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/Orc/Core.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/Mangler.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <cassert> -#include <list> -#include <memory> -#include <string> - -namespace llvm { -namespace orc { - -/// Lazy-emitting IR layer. -/// -/// This layer accepts LLVM IR Modules (via addModule) but does not -/// immediately emit them the layer below. Instead, emission to the base layer -/// is deferred until the first time the client requests the address (via -/// JITSymbol::getAddress) for a symbol contained in this layer. -template <typename BaseLayerT> class LazyEmittingLayer { -private: - class EmissionDeferredModule { - public: - EmissionDeferredModule(VModuleKey K, std::unique_ptr<Module> M) - : K(std::move(K)), M(std::move(M)) {} - - JITSymbol find(StringRef Name, bool ExportedSymbolsOnly, BaseLayerT &B) { - switch (EmitState) { - case NotEmitted: - if (auto GV = searchGVs(Name, ExportedSymbolsOnly)) { - JITSymbolFlags Flags = JITSymbolFlags::fromGlobalValue(*GV); - auto GetAddress = [this, ExportedSymbolsOnly, Name = Name.str(), - &B]() -> Expected<JITTargetAddress> { - if (this->EmitState == Emitting) - return 0; - else if (this->EmitState == NotEmitted) { - this->EmitState = Emitting; - if (auto Err = this->emitToBaseLayer(B)) - return std::move(Err); - this->EmitState = Emitted; - } - if (auto Sym = B.findSymbolIn(K, Name, ExportedSymbolsOnly)) - return Sym.getAddress(); - else if (auto Err = Sym.takeError()) - return std::move(Err); - else - llvm_unreachable("Successful symbol lookup should return " - "definition address here"); - }; - return JITSymbol(std::move(GetAddress), Flags); - } else - return nullptr; - case Emitting: - // Calling "emit" can trigger a recursive call to 'find' (e.g. to check - // for pre-existing definitions of common-symbol), but any symbol in - // this module would already have been found internally (in the - // RuntimeDyld that did the lookup), so just return a nullptr here. - return nullptr; - case Emitted: - return B.findSymbolIn(K, std::string(Name), ExportedSymbolsOnly); - } - llvm_unreachable("Invalid emit-state."); - } - - Error removeModuleFromBaseLayer(BaseLayerT& BaseLayer) { - return EmitState != NotEmitted ? BaseLayer.removeModule(K) - : Error::success(); - } - - void emitAndFinalize(BaseLayerT &BaseLayer) { - assert(EmitState != Emitting && - "Cannot emitAndFinalize while already emitting"); - if (EmitState == NotEmitted) { - EmitState = Emitting; - emitToBaseLayer(BaseLayer); - EmitState = Emitted; - } - BaseLayer.emitAndFinalize(K); - } - - private: - - const GlobalValue* searchGVs(StringRef Name, - bool ExportedSymbolsOnly) const { - // FIXME: We could clean all this up if we had a way to reliably demangle - // names: We could just demangle name and search, rather than - // mangling everything else. - - // If we have already built the mangled name set then just search it. - if (MangledSymbols) { - auto VI = MangledSymbols->find(Name); - if (VI == MangledSymbols->end()) - return nullptr; - auto GV = VI->second; - if (!ExportedSymbolsOnly || GV->hasDefaultVisibility()) - return GV; - return nullptr; - } - - // If we haven't built the mangled name set yet, try to build it. As an - // optimization this will leave MangledNames set to nullptr if we find - // Name in the process of building the set. - return buildMangledSymbols(Name, ExportedSymbolsOnly); - } - - Error emitToBaseLayer(BaseLayerT &BaseLayer) { - // We don't need the mangled names set any more: Once we've emitted this - // to the base layer we'll just look for symbols there. - MangledSymbols.reset(); - return BaseLayer.addModule(std::move(K), std::move(M)); - } - - // If the mangled name of the given GlobalValue matches the given search - // name (and its visibility conforms to the ExportedSymbolsOnly flag) then - // return the symbol. Otherwise, add the mangled name to the Names map and - // return nullptr. - const GlobalValue* addGlobalValue(StringMap<const GlobalValue*> &Names, - const GlobalValue &GV, - const Mangler &Mang, StringRef SearchName, - bool ExportedSymbolsOnly) const { - // Modules don't "provide" decls or common symbols. - if (GV.isDeclaration() || GV.hasCommonLinkage()) - return nullptr; - - // Mangle the GV name. - std::string MangledName; - { - raw_string_ostream MangledNameStream(MangledName); - Mang.getNameWithPrefix(MangledNameStream, &GV, false); - } - - // Check whether this is the name we were searching for, and if it is then - // bail out early. - if (MangledName == SearchName) - if (!ExportedSymbolsOnly || GV.hasDefaultVisibility()) - return &GV; - - // Otherwise add this to the map for later. - Names[MangledName] = &GV; - return nullptr; - } - - // Build the MangledSymbols map. Bails out early (with MangledSymbols left set - // to nullptr) if the given SearchName is found while building the map. - const GlobalValue* buildMangledSymbols(StringRef SearchName, - bool ExportedSymbolsOnly) const { - assert(!MangledSymbols && "Mangled symbols map already exists?"); - - auto Symbols = std::make_unique<StringMap<const GlobalValue*>>(); - - Mangler Mang; - - for (const auto &GO : M->global_objects()) - if (auto GV = addGlobalValue(*Symbols, GO, Mang, SearchName, - ExportedSymbolsOnly)) - return GV; - - MangledSymbols = std::move(Symbols); - return nullptr; - } - - enum { NotEmitted, Emitting, Emitted } EmitState = NotEmitted; - VModuleKey K; - std::unique_ptr<Module> M; - mutable std::unique_ptr<StringMap<const GlobalValue*>> MangledSymbols; - }; - - BaseLayerT &BaseLayer; - std::map<VModuleKey, std::unique_ptr<EmissionDeferredModule>> ModuleMap; - -public: - - /// Construct a lazy emitting layer. - LLVM_ATTRIBUTE_DEPRECATED( - LazyEmittingLayer(BaseLayerT &BaseLayer), - "ORCv1 layers (including LazyEmittingLayer) are deprecated. Please use " - "ORCv2, where lazy emission is the default"); - - /// Construct a lazy emitting layer. - LazyEmittingLayer(ORCv1DeprecationAcknowledgement, BaseLayerT &BaseLayer) - : BaseLayer(BaseLayer) {} - - /// Add the given module to the lazy emitting layer. - Error addModule(VModuleKey K, std::unique_ptr<Module> M) { - assert(!ModuleMap.count(K) && "VModuleKey K already in use"); - ModuleMap[K] = - std::make_unique<EmissionDeferredModule>(std::move(K), std::move(M)); - return Error::success(); - } - - /// Remove the module represented by the given handle. - /// - /// This method will free the memory associated with the given module, both - /// in this layer, and the base layer. - Error removeModule(VModuleKey K) { - auto I = ModuleMap.find(K); - assert(I != ModuleMap.end() && "VModuleKey K not valid here"); - auto EDM = std::move(I.second); - ModuleMap.erase(I); - return EDM->removeModuleFromBaseLayer(BaseLayer); - } - - /// Search for the given named symbol. - /// @param Name The name of the symbol to search for. - /// @param ExportedSymbolsOnly If true, search only for exported symbols. - /// @return A handle for the given named symbol, if it exists. - JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) { - // Look for the symbol among existing definitions. - if (auto Symbol = BaseLayer.findSymbol(Name, ExportedSymbolsOnly)) - return Symbol; - - // If not found then search the deferred modules. If any of these contain a - // definition of 'Name' then they will return a JITSymbol that will emit - // the corresponding module when the symbol address is requested. - for (auto &KV : ModuleMap) - if (auto Symbol = KV.second->find(Name, ExportedSymbolsOnly, BaseLayer)) - return Symbol; - - // If no definition found anywhere return a null symbol. - return nullptr; - } - - /// Get the address of the given symbol in the context of the of - /// compiled modules represented by the key K. - JITSymbol findSymbolIn(VModuleKey K, const std::string &Name, - bool ExportedSymbolsOnly) { - assert(ModuleMap.count(K) && "VModuleKey K not valid here"); - return ModuleMap[K]->find(Name, ExportedSymbolsOnly, BaseLayer); - } - - /// Immediately emit and finalize the module represented by the given - /// key. - Error emitAndFinalize(VModuleKey K) { - assert(ModuleMap.count(K) && "VModuleKey K not valid here"); - return ModuleMap[K]->emitAndFinalize(BaseLayer); - } -}; - -template <typename BaseLayerT> -LazyEmittingLayer<BaseLayerT>::LazyEmittingLayer(BaseLayerT &BaseLayer) - : BaseLayer(BaseLayer) {} - -} // end namespace orc -} // end namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORC_LAZYEMITTINGLAYER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h index 0d3ccecdf121..e6a9d8945285 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h @@ -40,6 +40,9 @@ public: using NotifyResolvedFunction = unique_function<Error(JITTargetAddress ResolvedAddr)>; + LazyCallThroughManager(ExecutionSession &ES, + JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP); + // Return a free call-through trampoline and bind it to look up and call // through to the given symbol. Expected<JITTargetAddress> @@ -56,9 +59,6 @@ protected: using NotifyLandingResolvedFunction = TrampolinePool::NotifyLandingResolvedFunction; - LazyCallThroughManager(ExecutionSession &ES, - JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP); - struct ReexportsEntry { JITDylib *SourceJD; SymbolStringPtr SymbolName; @@ -144,12 +144,12 @@ public: IndirectStubsManager &ISManager, JITDylib &SourceJD, SymbolAliasMap CallableAliases, - ImplSymbolMap *SrcJDLoc, VModuleKey K); + ImplSymbolMap *SrcJDLoc); StringRef getName() const override; private: - void materialize(MaterializationResponsibility R) override; + void materialize(std::unique_ptr<MaterializationResponsibility> R) override; void discard(const JITDylib &JD, const SymbolStringPtr &Name) override; static SymbolFlagsMap extractFlags(const SymbolAliasMap &Aliases); @@ -166,11 +166,10 @@ private: inline std::unique_ptr<LazyReexportsMaterializationUnit> lazyReexports(LazyCallThroughManager &LCTManager, IndirectStubsManager &ISManager, JITDylib &SourceJD, - SymbolAliasMap CallableAliases, ImplSymbolMap *SrcJDLoc = nullptr, - VModuleKey K = VModuleKey()) { + SymbolAliasMap CallableAliases, + ImplSymbolMap *SrcJDLoc = nullptr) { return std::make_unique<LazyReexportsMaterializationUnit>( - LCTManager, ISManager, SourceJD, std::move(CallableAliases), SrcJDLoc, - std::move(K)); + LCTManager, ISManager, SourceJD, std::move(CallableAliases), SrcJDLoc); } } // End namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h b/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h deleted file mode 100644 index b20202a49ef6..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/Legacy.h +++ /dev/null @@ -1,211 +0,0 @@ -//===--- Legacy.h -- Adapters for ExecutionEngine API interop ---*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Contains core ORC APIs. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_LEGACY_H -#define LLVM_EXECUTIONENGINE_ORC_LEGACY_H - -#include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/Orc/Core.h" - -namespace llvm { -namespace orc { - -/// SymbolResolver is a composable interface for looking up symbol flags -/// and addresses using the AsynchronousSymbolQuery type. It will -/// eventually replace the LegacyJITSymbolResolver interface as the -/// stardard ORC symbol resolver type. -/// -/// FIXME: SymbolResolvers should go away and be replaced with VSOs with -/// defenition generators. -class SymbolResolver { -public: - virtual ~SymbolResolver() = default; - - /// Returns the subset of the given symbols that the caller is responsible for - /// materializing. - virtual SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) = 0; - - /// For each symbol in Symbols that can be found, assigns that symbols - /// value in Query. Returns the set of symbols that could not be found. - virtual SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query, - SymbolNameSet Symbols) = 0; - -private: - virtual void anchor(); -}; - -/// Implements SymbolResolver with a pair of supplied function objects -/// for convenience. See createSymbolResolver. -template <typename GetResponsibilitySetFn, typename LookupFn> -class LambdaSymbolResolver final : public SymbolResolver { -public: - template <typename GetResponsibilitySetFnRef, typename LookupFnRef> - LambdaSymbolResolver(GetResponsibilitySetFnRef &&GetResponsibilitySet, - LookupFnRef &&Lookup) - : GetResponsibilitySet( - std::forward<GetResponsibilitySetFnRef>(GetResponsibilitySet)), - Lookup(std::forward<LookupFnRef>(Lookup)) {} - - SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) final { - return GetResponsibilitySet(Symbols); - } - - SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query, - SymbolNameSet Symbols) final { - return Lookup(std::move(Query), std::move(Symbols)); - } - -private: - GetResponsibilitySetFn GetResponsibilitySet; - LookupFn Lookup; -}; - -/// Creates a SymbolResolver implementation from the pair of supplied -/// function objects. -template <typename GetResponsibilitySetFn, typename LookupFn> -std::unique_ptr<LambdaSymbolResolver< - std::remove_cv_t<std::remove_reference_t<GetResponsibilitySetFn>>, - std::remove_cv_t<std::remove_reference_t<LookupFn>>>> -createSymbolResolver(GetResponsibilitySetFn &&GetResponsibilitySet, - LookupFn &&Lookup) { - using LambdaSymbolResolverImpl = LambdaSymbolResolver< - std::remove_cv_t<std::remove_reference_t<GetResponsibilitySetFn>>, - std::remove_cv_t<std::remove_reference_t<LookupFn>>>; - return std::make_unique<LambdaSymbolResolverImpl>( - std::forward<GetResponsibilitySetFn>(GetResponsibilitySet), - std::forward<LookupFn>(Lookup)); -} - -/// Legacy adapter. Remove once we kill off the old ORC layers. -class JITSymbolResolverAdapter : public JITSymbolResolver { -public: - JITSymbolResolverAdapter(ExecutionSession &ES, SymbolResolver &R, - MaterializationResponsibility *MR); - Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override; - void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) override; - -private: - ExecutionSession &ES; - std::set<SymbolStringPtr> ResolvedStrings; - SymbolResolver &R; - MaterializationResponsibility *MR; -}; - -/// Use the given legacy-style FindSymbol function (i.e. a function that takes -/// a const std::string& or StringRef and returns a JITSymbol) to get the -/// subset of symbols that the caller is responsible for materializing. If any -/// JITSymbol returned by FindSymbol is in an error state the function returns -/// immediately with that error. -/// -/// Useful for implementing getResponsibilitySet bodies that query legacy -/// resolvers. -template <typename FindSymbolFn> -Expected<SymbolNameSet> -getResponsibilitySetWithLegacyFn(const SymbolNameSet &Symbols, - FindSymbolFn FindSymbol) { - SymbolNameSet Result; - - for (auto &S : Symbols) { - if (JITSymbol Sym = FindSymbol(*S)) { - if (!Sym.getFlags().isStrong()) - Result.insert(S); - } else if (auto Err = Sym.takeError()) - return std::move(Err); - } - - return Result; -} - -/// Use the given legacy-style FindSymbol function (i.e. a function that -/// takes a const std::string& or StringRef and returns a JITSymbol) to -/// find the address and flags for each symbol in Symbols and store the -/// result in Query. If any JITSymbol returned by FindSymbol is in an -/// error then Query.notifyFailed(...) is called with that error and the -/// function returns immediately. On success, returns the set of symbols -/// not found. -/// -/// Useful for implementing lookup bodies that query legacy resolvers. -template <typename FindSymbolFn> -SymbolNameSet -lookupWithLegacyFn(ExecutionSession &ES, AsynchronousSymbolQuery &Query, - const SymbolNameSet &Symbols, FindSymbolFn FindSymbol) { - SymbolNameSet SymbolsNotFound; - bool NewSymbolsResolved = false; - - for (auto &S : Symbols) { - if (JITSymbol Sym = FindSymbol(*S)) { - if (auto Addr = Sym.getAddress()) { - Query.notifySymbolMetRequiredState( - S, JITEvaluatedSymbol(*Addr, Sym.getFlags())); - NewSymbolsResolved = true; - } else { - ES.legacyFailQuery(Query, Addr.takeError()); - return SymbolNameSet(); - } - } else if (auto Err = Sym.takeError()) { - ES.legacyFailQuery(Query, std::move(Err)); - return SymbolNameSet(); - } else - SymbolsNotFound.insert(S); - } - - if (NewSymbolsResolved && Query.isComplete()) - Query.handleComplete(); - - return SymbolsNotFound; -} - -/// An ORC SymbolResolver implementation that uses a legacy -/// findSymbol-like function to perform lookup; -template <typename LegacyLookupFn> -class LegacyLookupFnResolver final : public SymbolResolver { -public: - using ErrorReporter = std::function<void(Error)>; - - LegacyLookupFnResolver(ExecutionSession &ES, LegacyLookupFn LegacyLookup, - ErrorReporter ReportError) - : ES(ES), LegacyLookup(std::move(LegacyLookup)), - ReportError(std::move(ReportError)) {} - - SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) final { - if (auto ResponsibilitySet = - getResponsibilitySetWithLegacyFn(Symbols, LegacyLookup)) - return std::move(*ResponsibilitySet); - else { - ReportError(ResponsibilitySet.takeError()); - return SymbolNameSet(); - } - } - - SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query, - SymbolNameSet Symbols) final { - return lookupWithLegacyFn(ES, *Query, Symbols, LegacyLookup); - } - -private: - ExecutionSession &ES; - LegacyLookupFn LegacyLookup; - ErrorReporter ReportError; -}; - -template <typename LegacyLookupFn> -std::shared_ptr<LegacyLookupFnResolver<LegacyLookupFn>> -createLegacyLookupResolver(ExecutionSession &ES, LegacyLookupFn LegacyLookup, - std::function<void(Error)> ErrorReporter) { - return std::make_shared<LegacyLookupFnResolver<LegacyLookupFn>>( - ES, std::move(LegacyLookup), std::move(ErrorReporter)); -} - -} // End namespace orc -} // End namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORC_LEGACY_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h index 15fe079eccaf..90e1d4704f34 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h @@ -98,8 +98,9 @@ public: ExecutionSession &getExecutionSession() const { return ES; } Error setupJITDylib(JITDylib &JD) override; - Error notifyAdding(JITDylib &JD, const MaterializationUnit &MU) override; - Error notifyRemoving(JITDylib &JD, VModuleKey K) override; + Error notifyAdding(ResourceTracker &RT, + const MaterializationUnit &MU) override; + Error notifyRemoving(ResourceTracker &RT) override; Expected<InitializerSequence> getInitializerSequence(JITDylib &JD); @@ -119,6 +120,19 @@ private: LocalDependenciesMap getSyntheticSymbolLocalDependencies( MaterializationResponsibility &MR) override; + // FIXME: We should be tentatively tracking scraped sections and discarding + // if the MR fails. + Error notifyFailed(MaterializationResponsibility &MR) override { + return Error::success(); + } + + Error notifyRemovingResources(ResourceKey K) override { + return Error::success(); + } + + void notifyTransferringResources(ResourceKey DstKey, + ResourceKey SrcKey) override {} + private: using InitSymbolDepMap = DenseMap<MaterializationResponsibility *, JITLinkSymbolVector>; @@ -136,8 +150,6 @@ private: InitSymbolDepMap InitSymbolDeps; }; - static std::vector<JITDylib *> getDFSLinkOrder(JITDylib &JD); - void registerInitInfo(JITDylib &JD, JITTargetAddress ObjCImageInfoAddr, MachOJITDylibInitializers::SectionExtent ModInits, MachOJITDylibInitializers::SectionExtent ObjCSelRefs, diff --git a/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h b/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h deleted file mode 100644 index ffa37a13d064..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h +++ /dev/null @@ -1,43 +0,0 @@ -//===------ NullResolver.h - Reject symbol lookup requests ------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Defines a RuntimeDyld::SymbolResolver subclass that rejects all symbol -// resolution requests, for clients that have no cross-object fixups. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_NULLRESOLVER_H -#define LLVM_EXECUTIONENGINE_ORC_NULLRESOLVER_H - -#include "llvm/ExecutionEngine/Orc/Legacy.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" - -namespace llvm { -namespace orc { - -class NullResolver : public SymbolResolver { -public: - SymbolNameSet getResponsibilitySet(const SymbolNameSet &Symbols) final; - - SymbolNameSet lookup(std::shared_ptr<AsynchronousSymbolQuery> Query, - SymbolNameSet Symbols) final; -}; - -/// SymbolResolver impliementation that rejects all resolution requests. -/// Useful for clients that have no cross-object fixups. -class NullLegacyResolver : public LegacyJITSymbolResolver { -public: - JITSymbol findSymbol(const std::string &Name) final; - - JITSymbol findSymbolInLogicalDylib(const std::string &Name) final; -}; - -} // End namespace orc. -} // End namespace llvm. - -#endif // LLVM_EXECUTIONENGINE_ORC_NULLRESOLVER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index 2bfe3b001709..f2975e29fcd6 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -35,6 +35,7 @@ namespace llvm { namespace jitlink { class EHFrameRegistrar; +class LinkGraph; class Symbol; } // namespace jitlink @@ -51,7 +52,7 @@ class ObjectLinkingLayerJITLinkContext; /// Clients can use this class to add relocatable object files to an /// ExecutionSession, and it typically serves as the base layer (underneath /// a compiling layer like IRCompileLayer) for the rest of the JIT. -class ObjectLinkingLayer : public ObjectLayer { +class ObjectLinkingLayer : public ObjectLayer, private ResourceManager { friend class ObjectLinkingLayerJITLinkContext; public: @@ -72,10 +73,10 @@ public: virtual Error notifyEmitted(MaterializationResponsibility &MR) { return Error::success(); } - virtual Error notifyRemovingModule(VModuleKey K) { - return Error::success(); - } - virtual Error notifyRemovingAllModules() { return Error::success(); } + virtual Error notifyFailed(MaterializationResponsibility &MR) = 0; + virtual Error notifyRemovingResources(ResourceKey K) = 0; + virtual void notifyTransferringResources(ResourceKey DstKey, + ResourceKey SrcKey) = 0; /// Return any dependencies that synthetic symbols (e.g. init symbols) /// have on locally scoped jitlink::Symbols. This is used by the @@ -90,8 +91,14 @@ public: using ReturnObjectBufferFunction = std::function<void(std::unique_ptr<MemoryBuffer>)>; - /// Construct an ObjectLinkingLayer with the given NotifyLoaded, - /// and NotifyEmitted functors. + /// Construct an ObjectLinkingLayer. + ObjectLinkingLayer(ExecutionSession &ES, + jitlink::JITLinkMemoryManager &MemMgr); + + /// Construct an ObjectLinkingLayer. Takes ownership of the given + /// JITLinkMemoryManager. This method is a temporary hack to simplify + /// co-existence with RTDyldObjectLinkingLayer (which also owns its + /// allocators). ObjectLinkingLayer(ExecutionSession &ES, std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr); @@ -112,10 +119,14 @@ public: return *this; } - /// Emit the object. - void emit(MaterializationResponsibility R, + /// Emit an object file. + void emit(std::unique_ptr<MaterializationResponsibility> R, std::unique_ptr<MemoryBuffer> O) override; + /// Emit a LinkGraph. + void emit(std::unique_ptr<MaterializationResponsibility> R, + std::unique_ptr<jitlink::LinkGraph> G); + /// Instructs this ObjectLinkingLayer instance to override the symbol flags /// found in the AtomGraph with the flags supplied by the /// MaterializationResponsibility instance. This is a workaround to support @@ -155,27 +166,31 @@ private: void notifyLoaded(MaterializationResponsibility &MR); Error notifyEmitted(MaterializationResponsibility &MR, AllocPtr Alloc); - Error removeModule(VModuleKey K); - Error removeAllModules(); + Error handleRemoveResources(ResourceKey K) override; + void handleTransferResources(ResourceKey DstKey, ResourceKey SrcKey) override; mutable std::mutex LayerMutex; - std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr; + jitlink::JITLinkMemoryManager &MemMgr; + std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgrOwnership; bool OverrideObjectFlags = false; bool AutoClaimObjectSymbols = false; ReturnObjectBufferFunction ReturnObjectBuffer; - DenseMap<VModuleKey, AllocPtr> TrackedAllocs; - std::vector<AllocPtr> UntrackedAllocs; + DenseMap<ResourceKey, std::vector<AllocPtr>> Allocs; std::vector<std::unique_ptr<Plugin>> Plugins; }; class EHFrameRegistrationPlugin : public ObjectLinkingLayer::Plugin { public: - EHFrameRegistrationPlugin(jitlink::EHFrameRegistrar &Registrar); - Error notifyEmitted(MaterializationResponsibility &MR) override; + EHFrameRegistrationPlugin( + ExecutionSession &ES, + std::unique_ptr<jitlink::EHFrameRegistrar> Registrar); void modifyPassConfig(MaterializationResponsibility &MR, const Triple &TT, jitlink::PassConfiguration &PassConfig) override; - Error notifyRemovingModule(VModuleKey K) override; - Error notifyRemovingAllModules() override; + Error notifyEmitted(MaterializationResponsibility &MR) override; + Error notifyFailed(MaterializationResponsibility &MR) override; + Error notifyRemovingResources(ResourceKey K) override; + void notifyTransferringResources(ResourceKey DstKey, + ResourceKey SrcKey) override; private: @@ -185,10 +200,10 @@ private: }; std::mutex EHFramePluginMutex; - jitlink::EHFrameRegistrar &Registrar; + ExecutionSession &ES; + std::unique_ptr<jitlink::EHFrameRegistrar> Registrar; DenseMap<MaterializationResponsibility *, EHFrameRange> InProcessLinks; - DenseMap<VModuleKey, EHFrameRange> TrackedEHFrameRanges; - std::vector<EHFrameRange> UntrackedEHFrameRanges; + DenseMap<ResourceKey, std::vector<EHFrameRange>> EHFrameRanges; }; } // end namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h index bf989cc8677c..d8395ab34e47 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h @@ -31,7 +31,7 @@ public: ObjectTransformLayer(ExecutionSession &ES, ObjectLayer &BaseLayer, TransformFunction Transform = TransformFunction()); - void emit(MaterializationResponsibility R, + void emit(std::unique_ptr<MaterializationResponsibility> R, std::unique_ptr<MemoryBuffer> O) override; void setTransform(TransformFunction Transform) { @@ -43,88 +43,6 @@ private: TransformFunction Transform; }; -/// Object mutating layer. -/// -/// This layer accepts sets of ObjectFiles (via addObject). It -/// immediately applies the user supplied functor to each object, then adds -/// the set of transformed objects to the layer below. -template <typename BaseLayerT, typename TransformFtor> -class LegacyObjectTransformLayer { -public: - /// Construct an ObjectTransformLayer with the given BaseLayer - LLVM_ATTRIBUTE_DEPRECATED( - LegacyObjectTransformLayer(BaseLayerT &BaseLayer, - TransformFtor Transform = TransformFtor()), - "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please " - "use " - "the ORCv2 ObjectTransformLayer instead"); - - /// Legacy layer constructor with deprecation acknowledgement. - LegacyObjectTransformLayer(ORCv1DeprecationAcknowledgement, - BaseLayerT &BaseLayer, - TransformFtor Transform = TransformFtor()) - : BaseLayer(BaseLayer), Transform(std::move(Transform)) {} - - /// Apply the transform functor to each object in the object set, then - /// add the resulting set of objects to the base layer, along with the - /// memory manager and symbol resolver. - /// - /// @return A handle for the added objects. - template <typename ObjectPtr> Error addObject(VModuleKey K, ObjectPtr Obj) { - return BaseLayer.addObject(std::move(K), Transform(std::move(Obj))); - } - - /// Remove the object set associated with the VModuleKey K. - Error removeObject(VModuleKey K) { return BaseLayer.removeObject(K); } - - /// Search for the given named symbol. - /// @param Name The name of the symbol to search for. - /// @param ExportedSymbolsOnly If true, search only for exported symbols. - /// @return A handle for the given named symbol, if it exists. - JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) { - return BaseLayer.findSymbol(Name, ExportedSymbolsOnly); - } - - /// Get the address of the given symbol in the context of the set of - /// objects represented by the VModuleKey K. This call is forwarded to - /// the base layer's implementation. - /// @param K The VModuleKey associated with the object set to search in. - /// @param Name The name of the symbol to search for. - /// @param ExportedSymbolsOnly If true, search only for exported symbols. - /// @return A handle for the given named symbol, if it is found in the - /// given object set. - JITSymbol findSymbolIn(VModuleKey K, const std::string &Name, - bool ExportedSymbolsOnly) { - return BaseLayer.findSymbolIn(K, Name, ExportedSymbolsOnly); - } - - /// Immediately emit and finalize the object set represented by the - /// given VModuleKey K. - Error emitAndFinalize(VModuleKey K) { return BaseLayer.emitAndFinalize(K); } - - /// Map section addresses for the objects associated with the - /// VModuleKey K. - void mapSectionAddress(VModuleKey K, const void *LocalAddress, - JITTargetAddress TargetAddr) { - BaseLayer.mapSectionAddress(K, LocalAddress, TargetAddr); - } - - /// Access the transform functor directly. - TransformFtor &getTransform() { return Transform; } - - /// Access the mumate functor directly. - const TransformFtor &getTransform() const { return Transform; } - -private: - BaseLayerT &BaseLayer; - TransformFtor Transform; -}; - -template <typename BaseLayerT, typename TransformFtor> -LegacyObjectTransformLayer<BaseLayerT, TransformFtor>:: - LegacyObjectTransformLayer(BaseLayerT &BaseLayer, TransformFtor Transform) - : BaseLayer(BaseLayer), Transform(std::move(Transform)) {} - } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h new file mode 100644 index 000000000000..a8aa42799115 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h @@ -0,0 +1,415 @@ +//===--- OrcRPCTargetProcessControl.h - Remote target control ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utilities for interacting with target processes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_ORCRPCTARGETPROCESSCONTROL_H +#define LLVM_EXECUTIONENGINE_ORC_ORCRPCTARGETPROCESSCONTROL_H + +#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h" +#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h" +#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" +#include "llvm/Support/MSVCErrorWorkarounds.h" + +namespace llvm { +namespace orc { + +/// JITLinkMemoryManager implementation for a process connected via an ORC RPC +/// endpoint. +template <typename OrcRPCTPCImplT> +class OrcRPCTPCJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { +private: + struct HostAlloc { + std::unique_ptr<char[]> Mem; + uint64_t Size; + }; + + struct TargetAlloc { + JITTargetAddress Address = 0; + uint64_t AllocatedSize = 0; + }; + + using HostAllocMap = DenseMap<int, HostAlloc>; + using TargetAllocMap = DenseMap<int, TargetAlloc>; + +public: + class OrcRPCAllocation : public Allocation { + public: + OrcRPCAllocation(OrcRPCTPCJITLinkMemoryManager<OrcRPCTPCImplT> &Parent, + HostAllocMap HostAllocs, TargetAllocMap TargetAllocs) + : Parent(Parent), HostAllocs(std::move(HostAllocs)), + TargetAllocs(std::move(TargetAllocs)) { + assert(HostAllocs.size() == TargetAllocs.size() && + "HostAllocs size should match TargetAllocs"); + } + + ~OrcRPCAllocation() override { + assert(TargetAllocs.empty() && "failed to deallocate"); + } + + MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override { + auto I = HostAllocs.find(Seg); + assert(I != HostAllocs.end() && "No host allocation for segment"); + auto &HA = I->second; + return {HA.Mem.get(), static_cast<size_t>(HA.Size)}; + } + + JITTargetAddress getTargetMemory(ProtectionFlags Seg) override { + auto I = TargetAllocs.find(Seg); + assert(I != TargetAllocs.end() && "No target allocation for segment"); + return I->second.Address; + } + + void finalizeAsync(FinalizeContinuation OnFinalize) override { + + std::vector<tpctypes::BufferWrite> BufferWrites; + orcrpctpc::ReleaseOrFinalizeMemRequest FMR; + + for (auto &KV : HostAllocs) { + assert(TargetAllocs.count(KV.first) && + "No target allocation for buffer"); + auto &HA = KV.second; + auto &TA = TargetAllocs[KV.first]; + BufferWrites.push_back({TA.Address, StringRef(HA.Mem.get(), HA.Size)}); + FMR.push_back({orcrpctpc::toWireProtectionFlags( + static_cast<sys::Memory::ProtectionFlags>(KV.first)), + TA.Address, TA.AllocatedSize}); + } + + DEBUG_WITH_TYPE("orc", { + dbgs() << "finalizeAsync " << (void *)this << ":\n"; + auto FMRI = FMR.begin(); + for (auto &B : BufferWrites) { + auto Prot = FMRI->Prot; + ++FMRI; + dbgs() << " Writing " << formatv("{0:x16}", B.Buffer.size()) + << " bytes to " << ((Prot & orcrpctpc::WPF_Read) ? 'R' : '-') + << ((Prot & orcrpctpc::WPF_Write) ? 'W' : '-') + << ((Prot & orcrpctpc::WPF_Exec) ? 'X' : '-') + << " segment: local " << (const void *)B.Buffer.data() + << " -> target " << formatv("{0:x16}", B.Address) << "\n"; + } + }); + if (auto Err = + Parent.Parent.getMemoryAccess().writeBuffers(BufferWrites)) { + OnFinalize(std::move(Err)); + return; + } + + DEBUG_WITH_TYPE("orc", dbgs() << " Applying permissions...\n"); + if (auto Err = + Parent.getEndpoint().template callAsync<orcrpctpc::FinalizeMem>( + [OF = std::move(OnFinalize)](Error Err2) { + // FIXME: Dispatch to work queue. + std::thread([OF = std::move(OF), + Err3 = std::move(Err2)]() mutable { + DEBUG_WITH_TYPE( + "orc", { dbgs() << " finalizeAsync complete\n"; }); + OF(std::move(Err3)); + }).detach(); + return Error::success(); + }, + FMR)) { + DEBUG_WITH_TYPE("orc", dbgs() << " failed.\n"); + Parent.getEndpoint().abandonPendingResponses(); + Parent.reportError(std::move(Err)); + } + DEBUG_WITH_TYPE("orc", { + dbgs() << "Leaving finalizeAsync (finalization may continue in " + "background)\n"; + }); + } + + Error deallocate() override { + orcrpctpc::ReleaseOrFinalizeMemRequest RMR; + for (auto &KV : TargetAllocs) + RMR.push_back({orcrpctpc::toWireProtectionFlags( + static_cast<sys::Memory::ProtectionFlags>(KV.first)), + KV.second.Address, KV.second.AllocatedSize}); + TargetAllocs.clear(); + + return Parent.getEndpoint().template callB<orcrpctpc::ReleaseMem>(RMR); + } + + private: + OrcRPCTPCJITLinkMemoryManager<OrcRPCTPCImplT> &Parent; + HostAllocMap HostAllocs; + TargetAllocMap TargetAllocs; + }; + + OrcRPCTPCJITLinkMemoryManager(OrcRPCTPCImplT &Parent) : Parent(Parent) {} + + Expected<std::unique_ptr<Allocation>> + allocate(const jitlink::JITLinkDylib *JD, + const SegmentsRequestMap &Request) override { + orcrpctpc::ReserveMemRequest RMR; + HostAllocMap HostAllocs; + + for (auto &KV : Request) { + assert(KV.second.getContentSize() <= std::numeric_limits<size_t>::max() && + "Content size is out-of-range for host"); + + RMR.push_back({orcrpctpc::toWireProtectionFlags( + static_cast<sys::Memory::ProtectionFlags>(KV.first)), + KV.second.getContentSize() + KV.second.getZeroFillSize(), + KV.second.getAlignment()}); + HostAllocs[KV.first] = { + std::make_unique<char[]>(KV.second.getContentSize()), + KV.second.getContentSize()}; + } + + DEBUG_WITH_TYPE("orc", { + dbgs() << "Orc remote memmgr got request:\n"; + for (auto &KV : Request) + dbgs() << " permissions: " + << ((KV.first & sys::Memory::MF_READ) ? 'R' : '-') + << ((KV.first & sys::Memory::MF_WRITE) ? 'W' : '-') + << ((KV.first & sys::Memory::MF_EXEC) ? 'X' : '-') + << ", content size: " + << formatv("{0:x16}", KV.second.getContentSize()) + << " + zero-fill-size: " + << formatv("{0:x16}", KV.second.getZeroFillSize()) + << ", align: " << KV.second.getAlignment() << "\n"; + }); + + // FIXME: LLVM RPC needs to be fixed to support alt + // serialization/deserialization on return types. For now just + // translate from std::map to DenseMap manually. + auto TmpTargetAllocs = + Parent.getEndpoint().template callB<orcrpctpc::ReserveMem>(RMR); + if (!TmpTargetAllocs) + return TmpTargetAllocs.takeError(); + + if (TmpTargetAllocs->size() != RMR.size()) + return make_error<StringError>( + "Number of target allocations does not match request", + inconvertibleErrorCode()); + + TargetAllocMap TargetAllocs; + for (auto &E : *TmpTargetAllocs) + TargetAllocs[orcrpctpc::fromWireProtectionFlags(E.Prot)] = { + E.Address, E.AllocatedSize}; + + DEBUG_WITH_TYPE("orc", { + auto HAI = HostAllocs.begin(); + for (auto &KV : TargetAllocs) + dbgs() << " permissions: " + << ((KV.first & sys::Memory::MF_READ) ? 'R' : '-') + << ((KV.first & sys::Memory::MF_WRITE) ? 'W' : '-') + << ((KV.first & sys::Memory::MF_EXEC) ? 'X' : '-') + << " assigned local " << (void *)HAI->second.Mem.get() + << ", target " << formatv("{0:x16}", KV.second.Address) << "\n"; + }); + + return std::make_unique<OrcRPCAllocation>(*this, std::move(HostAllocs), + std::move(TargetAllocs)); + } + +private: + void reportError(Error Err) { Parent.reportError(std::move(Err)); } + + decltype(std::declval<OrcRPCTPCImplT>().getEndpoint()) getEndpoint() { + return Parent.getEndpoint(); + } + + OrcRPCTPCImplT &Parent; +}; + +/// TargetProcessControl::MemoryAccess implementation for a process connected +/// via an ORC RPC endpoint. +template <typename OrcRPCTPCImplT> +class OrcRPCTPCMemoryAccess : public TargetProcessControl::MemoryAccess { +public: + OrcRPCTPCMemoryAccess(OrcRPCTPCImplT &Parent) : Parent(Parent) {} + + void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws, + WriteResultFn OnWriteComplete) override { + writeViaRPC<orcrpctpc::WriteUInt8s>(Ws, std::move(OnWriteComplete)); + } + + void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws, + WriteResultFn OnWriteComplete) override { + writeViaRPC<orcrpctpc::WriteUInt16s>(Ws, std::move(OnWriteComplete)); + } + + void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws, + WriteResultFn OnWriteComplete) override { + writeViaRPC<orcrpctpc::WriteUInt32s>(Ws, std::move(OnWriteComplete)); + } + + void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws, + WriteResultFn OnWriteComplete) override { + writeViaRPC<orcrpctpc::WriteUInt64s>(Ws, std::move(OnWriteComplete)); + } + + void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws, + WriteResultFn OnWriteComplete) override { + writeViaRPC<orcrpctpc::WriteBuffers>(Ws, std::move(OnWriteComplete)); + } + +private: + template <typename WriteRPCFunction, typename WriteElementT> + void writeViaRPC(ArrayRef<WriteElementT> Ws, WriteResultFn OnWriteComplete) { + if (auto Err = Parent.getEndpoint().template callAsync<WriteRPCFunction>( + [OWC = std::move(OnWriteComplete)](Error Err2) mutable -> Error { + OWC(std::move(Err2)); + return Error::success(); + }, + Ws)) { + Parent.reportError(std::move(Err)); + Parent.getEndpoint().abandonPendingResponses(); + } + } + + OrcRPCTPCImplT &Parent; +}; + +// TargetProcessControl for a process connected via an ORC RPC Endpoint. +template <typename RPCEndpointT> +class OrcRPCTargetProcessControlBase : public TargetProcessControl { +public: + using ErrorReporter = unique_function<void(Error)>; + + using OnCloseConnectionFunction = unique_function<Error(Error)>; + + OrcRPCTargetProcessControlBase(std::shared_ptr<SymbolStringPool> SSP, + RPCEndpointT &EP, ErrorReporter ReportError) + : TargetProcessControl(std::move(SSP)), + ReportError(std::move(ReportError)), EP(EP) {} + + void reportError(Error Err) { ReportError(std::move(Err)); } + + RPCEndpointT &getEndpoint() { return EP; } + + Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override { + DEBUG_WITH_TYPE("orc", { + dbgs() << "Loading dylib \"" << (DylibPath ? DylibPath : "") << "\" "; + if (!DylibPath) + dbgs() << "(process symbols)"; + dbgs() << "\n"; + }); + if (!DylibPath) + DylibPath = ""; + auto H = EP.template callB<orcrpctpc::LoadDylib>(DylibPath); + DEBUG_WITH_TYPE("orc", { + if (H) + dbgs() << " got handle " << formatv("{0:x16}", *H) << "\n"; + else + dbgs() << " error, unable to load\n"; + }); + return H; + } + + Expected<std::vector<tpctypes::LookupResult>> + lookupSymbols(ArrayRef<LookupRequest> Request) override { + std::vector<orcrpctpc::RemoteLookupRequest> RR; + for (auto &E : Request) { + RR.push_back({}); + RR.back().first = E.Handle; + for (auto &KV : E.Symbols) + RR.back().second.push_back( + {(*KV.first).str(), + KV.second == SymbolLookupFlags::WeaklyReferencedSymbol}); + } + DEBUG_WITH_TYPE("orc", { + dbgs() << "Compound lookup:\n"; + for (auto &R : Request) { + dbgs() << " In " << formatv("{0:x16}", R.Handle) << ": {"; + bool First = true; + for (auto &KV : R.Symbols) { + dbgs() << (First ? "" : ",") << " " << *KV.first; + First = false; + } + dbgs() << " }\n"; + } + }); + return EP.template callB<orcrpctpc::LookupSymbols>(RR); + } + + Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr, + ArrayRef<std::string> Args) override { + DEBUG_WITH_TYPE("orc", { + dbgs() << "Running as main: " << formatv("{0:x16}", MainFnAddr) + << ", args = ["; + for (unsigned I = 0; I != Args.size(); ++I) + dbgs() << (I ? "," : "") << " \"" << Args[I] << "\""; + dbgs() << "]\n"; + }); + auto Result = EP.template callB<orcrpctpc::RunMain>(MainFnAddr, Args); + DEBUG_WITH_TYPE("orc", { + dbgs() << " call to " << formatv("{0:x16}", MainFnAddr); + if (Result) + dbgs() << " returned result " << *Result << "\n"; + else + dbgs() << " failed\n"; + }); + return Result; + } + + Expected<tpctypes::WrapperFunctionResult> + runWrapper(JITTargetAddress WrapperFnAddr, + ArrayRef<uint8_t> ArgBuffer) override { + DEBUG_WITH_TYPE("orc", { + dbgs() << "Running as wrapper function " + << formatv("{0:x16}", WrapperFnAddr) << " with " + << formatv("{0:x16}", ArgBuffer.size()) << " argument buffer\n"; + }); + auto Result = + EP.template callB<orcrpctpc::RunWrapper>(WrapperFnAddr, ArgBuffer); + // dbgs() << "Returned from runWrapper...\n"; + return Result; + } + + Error closeConnection(OnCloseConnectionFunction OnCloseConnection) { + DEBUG_WITH_TYPE("orc", dbgs() << "Closing connection to remote\n"); + return EP.template callAsync<orcrpctpc::CloseConnection>( + std::move(OnCloseConnection)); + } + + Error closeConnectionAndWait() { + std::promise<MSVCPError> P; + auto F = P.get_future(); + if (auto Err = closeConnection([&](Error Err2) -> Error { + P.set_value(std::move(Err2)); + return Error::success(); + })) { + EP.abandonAllPendingResponses(); + return joinErrors(std::move(Err), F.get()); + } + return F.get(); + } + +protected: + /// Subclasses must call this during construction to initialize the + /// TargetTriple and PageSize members. + Error initializeORCRPCTPCBase() { + if (auto TripleOrErr = EP.template callB<orcrpctpc::GetTargetTriple>()) + TargetTriple = Triple(*TripleOrErr); + else + return TripleOrErr.takeError(); + + if (auto PageSizeOrErr = EP.template callB<orcrpctpc::GetPageSize>()) + PageSize = *PageSizeOrErr; + else + return PageSizeOrErr.takeError(); + + return Error::success(); + } + +private: + ErrorReporter ReportError; + RPCEndpointT &EP; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_ORCRPCTARGETPROCESSCONTROL_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h index 86e8d5df3ad9..3d139740d677 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h @@ -20,6 +20,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" @@ -53,7 +54,7 @@ namespace remote { /// OrcRemoteTargetServer class) via an RPC system (see RPCUtils.h) to carry out /// its actions. class OrcRemoteTargetClient - : public rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel> { + : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> { public: /// Remote-mapped RuntimeDyld-compatible memory manager. class RemoteRTDyldMemoryManager : public RuntimeDyld::MemoryManager { @@ -329,6 +330,221 @@ public: std::vector<EHFrame> RegisteredEHFrames; }; + class RPCMMAlloc : public jitlink::JITLinkMemoryManager::Allocation { + using AllocationMap = DenseMap<unsigned, sys::MemoryBlock>; + using FinalizeContinuation = + jitlink::JITLinkMemoryManager::Allocation::FinalizeContinuation; + using ProtectionFlags = sys::Memory::ProtectionFlags; + using SegmentsRequestMap = + DenseMap<unsigned, jitlink::JITLinkMemoryManager::SegmentRequest>; + + RPCMMAlloc(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id) + : Client(Client), Id(Id) {} + + public: + static Expected<std::unique_ptr<RPCMMAlloc>> + Create(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id, + const SegmentsRequestMap &Request) { + auto *MM = new RPCMMAlloc(Client, Id); + + if (Error Err = MM->allocateHostBlocks(Request)) + return std::move(Err); + + if (Error Err = MM->allocateTargetBlocks()) + return std::move(Err); + + return std::unique_ptr<RPCMMAlloc>(MM); + } + + MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override { + assert(HostSegBlocks.count(Seg) && "No allocation for segment"); + return {static_cast<char *>(HostSegBlocks[Seg].base()), + HostSegBlocks[Seg].allocatedSize()}; + } + + JITTargetAddress getTargetMemory(ProtectionFlags Seg) override { + assert(TargetSegBlocks.count(Seg) && "No allocation for segment"); + return pointerToJITTargetAddress(TargetSegBlocks[Seg].base()); + } + + void finalizeAsync(FinalizeContinuation OnFinalize) override { + // Host allocations (working memory) remain ReadWrite. + OnFinalize(copyAndProtect()); + } + + Error deallocate() override { + // TODO: Cannot release target allocation. RPCAPI has no function + // symmetric to reserveMem(). Add RPC call like freeMem()? + return errorCodeToError(sys::Memory::releaseMappedMemory(HostAllocation)); + } + + private: + OrcRemoteTargetClient &Client; + ResourceIdMgr::ResourceId Id; + AllocationMap HostSegBlocks; + AllocationMap TargetSegBlocks; + JITTargetAddress TargetSegmentAddr; + sys::MemoryBlock HostAllocation; + + Error allocateHostBlocks(const SegmentsRequestMap &Request) { + unsigned TargetPageSize = Client.getPageSize(); + + if (!isPowerOf2_64(static_cast<uint64_t>(TargetPageSize))) + return make_error<StringError>("Host page size is not a power of 2", + inconvertibleErrorCode()); + + auto TotalSize = calcTotalAllocSize(Request, TargetPageSize); + if (!TotalSize) + return TotalSize.takeError(); + + // Allocate one slab to cover all the segments. + const sys::Memory::ProtectionFlags ReadWrite = + static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ | + sys::Memory::MF_WRITE); + std::error_code EC; + HostAllocation = + sys::Memory::allocateMappedMemory(*TotalSize, nullptr, ReadWrite, EC); + if (EC) + return errorCodeToError(EC); + + char *SlabAddr = static_cast<char *>(HostAllocation.base()); +#ifndef NDEBUG + char *SlabAddrEnd = SlabAddr + HostAllocation.allocatedSize(); +#endif + + // Allocate segment memory from the slab. + for (auto &KV : Request) { + const auto &Seg = KV.second; + + uint64_t SegmentSize = Seg.getContentSize() + Seg.getZeroFillSize(); + uint64_t AlignedSegmentSize = alignTo(SegmentSize, TargetPageSize); + + // Zero out zero-fill memory. + char *ZeroFillBegin = SlabAddr + Seg.getContentSize(); + memset(ZeroFillBegin, 0, Seg.getZeroFillSize()); + + // Record the block for this segment. + HostSegBlocks[KV.first] = + sys::MemoryBlock(SlabAddr, AlignedSegmentSize); + + SlabAddr += AlignedSegmentSize; + assert(SlabAddr <= SlabAddrEnd && "Out of range"); + } + + return Error::success(); + } + + Error allocateTargetBlocks() { + // Reserve memory for all blocks on the target. We need as much space on + // the target as we allocated on the host. + TargetSegmentAddr = Client.reserveMem(Id, HostAllocation.allocatedSize(), + Client.getPageSize()); + if (!TargetSegmentAddr) + return make_error<StringError>("Failed to reserve memory on the target", + inconvertibleErrorCode()); + + // Map memory blocks into the allocation, that match the host allocation. + JITTargetAddress TargetAllocAddr = TargetSegmentAddr; + for (const auto &KV : HostSegBlocks) { + size_t TargetAllocSize = KV.second.allocatedSize(); + + TargetSegBlocks[KV.first] = + sys::MemoryBlock(jitTargetAddressToPointer<void *>(TargetAllocAddr), + TargetAllocSize); + + TargetAllocAddr += TargetAllocSize; + assert(TargetAllocAddr - TargetSegmentAddr <= + HostAllocation.allocatedSize() && + "Out of range on target"); + } + + return Error::success(); + } + + Error copyAndProtect() { + unsigned Permissions = 0u; + + // Copy segments one by one. + for (auto &KV : TargetSegBlocks) { + Permissions |= KV.first; + + const sys::MemoryBlock &TargetBlock = KV.second; + const sys::MemoryBlock &HostBlock = HostSegBlocks.lookup(KV.first); + + size_t TargetAllocSize = TargetBlock.allocatedSize(); + auto TargetAllocAddr = pointerToJITTargetAddress(TargetBlock.base()); + auto *HostAllocBegin = static_cast<const char *>(HostBlock.base()); + + bool CopyErr = + Client.writeMem(TargetAllocAddr, HostAllocBegin, TargetAllocSize); + if (CopyErr) + return createStringError(inconvertibleErrorCode(), + "Failed to copy %d segment to the target", + KV.first); + } + + // Set permission flags for all segments at once. + bool ProtectErr = + Client.setProtections(Id, TargetSegmentAddr, Permissions); + if (ProtectErr) + return createStringError(inconvertibleErrorCode(), + "Failed to apply permissions for %d segment " + "on the target", + Permissions); + return Error::success(); + } + + static Expected<size_t> + calcTotalAllocSize(const SegmentsRequestMap &Request, + unsigned TargetPageSize) { + size_t TotalSize = 0; + for (const auto &KV : Request) { + const auto &Seg = KV.second; + + if (Seg.getAlignment() > TargetPageSize) + return make_error<StringError>("Cannot request alignment higher than " + "page alignment on target", + inconvertibleErrorCode()); + + TotalSize = alignTo(TotalSize, TargetPageSize); + TotalSize += Seg.getContentSize(); + TotalSize += Seg.getZeroFillSize(); + } + + return TotalSize; + } + }; + + class RemoteJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { + public: + RemoteJITLinkMemoryManager(OrcRemoteTargetClient &Client, + ResourceIdMgr::ResourceId Id) + : Client(Client), Id(Id) {} + + RemoteJITLinkMemoryManager(const RemoteJITLinkMemoryManager &) = delete; + RemoteJITLinkMemoryManager(RemoteJITLinkMemoryManager &&) = default; + + RemoteJITLinkMemoryManager & + operator=(const RemoteJITLinkMemoryManager &) = delete; + RemoteJITLinkMemoryManager & + operator=(RemoteJITLinkMemoryManager &&) = delete; + + ~RemoteJITLinkMemoryManager() { + Client.destroyRemoteAllocator(Id); + LLVM_DEBUG(dbgs() << "Destroyed remote allocator " << Id << "\n"); + } + + Expected<std::unique_ptr<Allocation>> + allocate(const jitlink::JITLinkDylib *JD, + const SegmentsRequestMap &Request) override { + return RPCMMAlloc::Create(Client, Id, Request); + } + + private: + OrcRemoteTargetClient &Client; + ResourceIdMgr::ResourceId Id; + }; + /// Remote indirect stubs manager. class RemoteIndirectStubsManager : public IndirectStubsManager { public: @@ -453,20 +669,8 @@ public: public: RemoteTrampolinePool(OrcRemoteTargetClient &Client) : Client(Client) {} - Expected<JITTargetAddress> getTrampoline() override { - std::lock_guard<std::mutex> Lock(RTPMutex); - if (AvailableTrampolines.empty()) { - if (auto Err = grow()) - return std::move(Err); - } - assert(!AvailableTrampolines.empty() && "Failed to grow trampoline pool"); - auto TrampolineAddr = AvailableTrampolines.back(); - AvailableTrampolines.pop_back(); - return TrampolineAddr; - } - private: - Error grow() { + Error grow() override { JITTargetAddress BlockAddr = 0; uint32_t NumTrampolines = 0; if (auto TrampolineInfoOrErr = Client.emitTrampolineBlock()) @@ -476,14 +680,12 @@ public: uint32_t TrampolineSize = Client.getTrampolineSize(); for (unsigned I = 0; I < NumTrampolines; ++I) - this->AvailableTrampolines.push_back(BlockAddr + (I * TrampolineSize)); + AvailableTrampolines.push_back(BlockAddr + (I * TrampolineSize)); return Error::success(); } - std::mutex RTPMutex; OrcRemoteTargetClient &Client; - std::vector<JITTargetAddress> AvailableTrampolines; }; /// Remote compile callback manager. @@ -501,7 +703,7 @@ public: /// Channel is the ChannelT instance to communicate on. It is assumed that /// the channel is ready to be read from and written to. static Expected<std::unique_ptr<OrcRemoteTargetClient>> - Create(rpc::RawByteChannel &Channel, ExecutionSession &ES) { + Create(shared::RawByteChannel &Channel, ExecutionSession &ES) { Error Err = Error::success(); auto Client = std::unique_ptr<OrcRemoteTargetClient>( new OrcRemoteTargetClient(Channel, ES, Err)); @@ -518,6 +720,14 @@ public: return callB<exec::CallIntVoid>(Addr); } + /// Call the int(int) function at the given address in the target and return + /// its result. + Expected<int> callIntInt(JITTargetAddress Addr, int Arg) { + LLVM_DEBUG(dbgs() << "Calling int(*)(int) " << format("0x%016" PRIx64, Addr) + << "\n"); + return callB<exec::CallIntInt>(Addr, Arg); + } + /// Call the int(int, char*[]) function at the given address in the target and /// return its result. Expected<int> callMain(JITTargetAddress Addr, @@ -546,6 +756,18 @@ public: new RemoteRTDyldMemoryManager(*this, Id)); } + /// Create a JITLink-compatible memory manager which will allocate working + /// memory on the host and target memory on the remote target. + Expected<std::unique_ptr<RemoteJITLinkMemoryManager>> + createRemoteJITLinkMemoryManager() { + auto Id = AllocatorIds.getNext(); + if (auto Err = callB<mem::CreateRemoteAllocator>(Id)) + return std::move(Err); + LLVM_DEBUG(dbgs() << "Created remote allocator " << Id << "\n"); + return std::unique_ptr<RemoteJITLinkMemoryManager>( + new RemoteJITLinkMemoryManager(*this, Id)); + } + /// Create an RCIndirectStubsManager that will allocate stubs on the remote /// target. Expected<std::unique_ptr<RemoteIndirectStubsManager>> @@ -583,9 +805,10 @@ public: Error terminateSession() { return callB<utils::TerminateSession>(); } private: - OrcRemoteTargetClient(rpc::RawByteChannel &Channel, ExecutionSession &ES, + OrcRemoteTargetClient(shared::RawByteChannel &Channel, ExecutionSession &ES, Error &Err) - : rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel>(Channel, true), + : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(Channel, + true), ES(ES) { ErrorAsOutParameter EAO(&Err); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h index 52a328165240..367bfb369191 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h @@ -16,8 +16,8 @@ #define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/Orc/RPC/RPCUtils.h" -#include "llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h" +#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h" +#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h" namespace llvm { namespace orc { @@ -73,10 +73,9 @@ private: } // end namespace remote -namespace rpc { +namespace shared { -template <> -class RPCTypeName<JITSymbolFlags> { +template <> class SerializationTypeName<JITSymbolFlags> { public: static const char *getName() { return "JITSymbolFlags"; } }; @@ -100,7 +99,7 @@ public: } }; -template <> class RPCTypeName<remote::DirectBufferWriter> { +template <> class SerializationTypeName<remote::DirectBufferWriter> { public: static const char *getName() { return "DirectBufferWriter"; } }; @@ -133,7 +132,7 @@ public: } }; -} // end namespace rpc +} // end namespace shared namespace remote { @@ -167,20 +166,20 @@ private: namespace eh { /// Registers EH frames on the remote. - class RegisterEHFrames - : public rpc::Function<RegisterEHFrames, - void(JITTargetAddress Addr, uint32_t Size)> { - public: - static const char *getName() { return "RegisterEHFrames"; } - }; +class RegisterEHFrames + : public shared::RPCFunction<RegisterEHFrames, + void(JITTargetAddress Addr, uint32_t Size)> { +public: + static const char *getName() { return "RegisterEHFrames"; } +}; /// Deregisters EH frames on the remote. - class DeregisterEHFrames - : public rpc::Function<DeregisterEHFrames, - void(JITTargetAddress Addr, uint32_t Size)> { - public: - static const char *getName() { return "DeregisterEHFrames"; } - }; +class DeregisterEHFrames + : public shared::RPCFunction<DeregisterEHFrames, + void(JITTargetAddress Addr, uint32_t Size)> { +public: + static const char *getName() { return "DeregisterEHFrames"; } +}; } // end namespace eh @@ -189,28 +188,38 @@ namespace exec { /// Call an 'int32_t()'-type function on the remote, returns the called /// function's return value. - class CallIntVoid - : public rpc::Function<CallIntVoid, int32_t(JITTargetAddress Addr)> { - public: - static const char *getName() { return "CallIntVoid"; } - }; +class CallIntVoid + : public shared::RPCFunction<CallIntVoid, int32_t(JITTargetAddress Addr)> { +public: + static const char *getName() { return "CallIntVoid"; } +}; + + /// Call an 'int32_t(int32_t)'-type function on the remote, returns the called + /// function's return value. +class CallIntInt + : public shared::RPCFunction<CallIntInt, + int32_t(JITTargetAddress Addr, int)> { +public: + static const char *getName() { return "CallIntInt"; } +}; /// Call an 'int32_t(int32_t, char**)'-type function on the remote, returns the /// called function's return value. - class CallMain - : public rpc::Function<CallMain, int32_t(JITTargetAddress Addr, - std::vector<std::string> Args)> { - public: - static const char *getName() { return "CallMain"; } - }; +class CallMain + : public shared::RPCFunction<CallMain, + int32_t(JITTargetAddress Addr, + std::vector<std::string> Args)> { +public: + static const char *getName() { return "CallMain"; } +}; /// Calls a 'void()'-type function on the remote, returns when the called /// function completes. - class CallVoidVoid - : public rpc::Function<CallVoidVoid, void(JITTargetAddress FnAddr)> { - public: - static const char *getName() { return "CallVoidVoid"; } - }; +class CallVoidVoid + : public shared::RPCFunction<CallVoidVoid, void(JITTargetAddress FnAddr)> { +public: + static const char *getName() { return "CallVoidVoid"; } +}; } // end namespace exec @@ -218,60 +227,62 @@ namespace exec { namespace mem { /// Creates a memory allocator on the remote. - class CreateRemoteAllocator - : public rpc::Function<CreateRemoteAllocator, - void(ResourceIdMgr::ResourceId AllocatorID)> { - public: - static const char *getName() { return "CreateRemoteAllocator"; } - }; +class CreateRemoteAllocator + : public shared::RPCFunction<CreateRemoteAllocator, + void(ResourceIdMgr::ResourceId AllocatorID)> { +public: + static const char *getName() { return "CreateRemoteAllocator"; } +}; /// Destroys a remote allocator, freeing any memory allocated by it. - class DestroyRemoteAllocator - : public rpc::Function<DestroyRemoteAllocator, - void(ResourceIdMgr::ResourceId AllocatorID)> { - public: - static const char *getName() { return "DestroyRemoteAllocator"; } - }; +class DestroyRemoteAllocator + : public shared::RPCFunction<DestroyRemoteAllocator, + void(ResourceIdMgr::ResourceId AllocatorID)> { +public: + static const char *getName() { return "DestroyRemoteAllocator"; } +}; /// Read a remote memory block. - class ReadMem - : public rpc::Function<ReadMem, std::vector<uint8_t>(JITTargetAddress Src, - uint64_t Size)> { - public: - static const char *getName() { return "ReadMem"; } - }; +class ReadMem + : public shared::RPCFunction< + ReadMem, std::vector<uint8_t>(JITTargetAddress Src, uint64_t Size)> { +public: + static const char *getName() { return "ReadMem"; } +}; /// Reserve a block of memory on the remote via the given allocator. - class ReserveMem - : public rpc::Function<ReserveMem, - JITTargetAddress(ResourceIdMgr::ResourceId AllocID, - uint64_t Size, uint32_t Align)> { - public: - static const char *getName() { return "ReserveMem"; } - }; +class ReserveMem + : public shared::RPCFunction< + ReserveMem, JITTargetAddress(ResourceIdMgr::ResourceId AllocID, + uint64_t Size, uint32_t Align)> { +public: + static const char *getName() { return "ReserveMem"; } +}; /// Set the memory protection on a memory block. - class SetProtections - : public rpc::Function<SetProtections, - void(ResourceIdMgr::ResourceId AllocID, - JITTargetAddress Dst, uint32_t ProtFlags)> { - public: - static const char *getName() { return "SetProtections"; } - }; +class SetProtections + : public shared::RPCFunction< + SetProtections, void(ResourceIdMgr::ResourceId AllocID, + JITTargetAddress Dst, uint32_t ProtFlags)> { +public: + static const char *getName() { return "SetProtections"; } +}; /// Write to a remote memory block. - class WriteMem - : public rpc::Function<WriteMem, void(remote::DirectBufferWriter DB)> { - public: - static const char *getName() { return "WriteMem"; } - }; +class WriteMem + : public shared::RPCFunction<WriteMem, + void(remote::DirectBufferWriter DB)> { +public: + static const char *getName() { return "WriteMem"; } +}; /// Write to a remote pointer. - class WritePtr : public rpc::Function<WritePtr, void(JITTargetAddress Dst, - JITTargetAddress Val)> { - public: - static const char *getName() { return "WritePtr"; } - }; +class WritePtr + : public shared::RPCFunction<WritePtr, void(JITTargetAddress Dst, + JITTargetAddress Val)> { +public: + static const char *getName() { return "WritePtr"; } +}; } // end namespace mem @@ -279,45 +290,46 @@ namespace mem { namespace stubs { /// Creates an indirect stub owner on the remote. - class CreateIndirectStubsOwner - : public rpc::Function<CreateIndirectStubsOwner, - void(ResourceIdMgr::ResourceId StubOwnerID)> { - public: - static const char *getName() { return "CreateIndirectStubsOwner"; } - }; +class CreateIndirectStubsOwner + : public shared::RPCFunction<CreateIndirectStubsOwner, + void(ResourceIdMgr::ResourceId StubOwnerID)> { +public: + static const char *getName() { return "CreateIndirectStubsOwner"; } +}; /// RPC function for destroying an indirect stubs owner. - class DestroyIndirectStubsOwner - : public rpc::Function<DestroyIndirectStubsOwner, - void(ResourceIdMgr::ResourceId StubsOwnerID)> { - public: - static const char *getName() { return "DestroyIndirectStubsOwner"; } - }; +class DestroyIndirectStubsOwner + : public shared::RPCFunction<DestroyIndirectStubsOwner, + void(ResourceIdMgr::ResourceId StubsOwnerID)> { +public: + static const char *getName() { return "DestroyIndirectStubsOwner"; } +}; /// EmitIndirectStubs result is (StubsBase, PtrsBase, NumStubsEmitted). - class EmitIndirectStubs - : public rpc::Function< - EmitIndirectStubs, - std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>( - ResourceIdMgr::ResourceId StubsOwnerID, - uint32_t NumStubsRequired)> { - public: - static const char *getName() { return "EmitIndirectStubs"; } - }; +class EmitIndirectStubs + : public shared::RPCFunction< + EmitIndirectStubs, + std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>( + ResourceIdMgr::ResourceId StubsOwnerID, + uint32_t NumStubsRequired)> { +public: + static const char *getName() { return "EmitIndirectStubs"; } +}; /// RPC function to emit the resolver block and return its address. - class EmitResolverBlock : public rpc::Function<EmitResolverBlock, void()> { - public: - static const char *getName() { return "EmitResolverBlock"; } - }; +class EmitResolverBlock + : public shared::RPCFunction<EmitResolverBlock, void()> { +public: + static const char *getName() { return "EmitResolverBlock"; } +}; /// EmitTrampolineBlock result is (BlockAddr, NumTrampolines). - class EmitTrampolineBlock - : public rpc::Function<EmitTrampolineBlock, - std::tuple<JITTargetAddress, uint32_t>()> { - public: - static const char *getName() { return "EmitTrampolineBlock"; } - }; +class EmitTrampolineBlock + : public shared::RPCFunction<EmitTrampolineBlock, + std::tuple<JITTargetAddress, uint32_t>()> { +public: + static const char *getName() { return "EmitTrampolineBlock"; } +}; } // end namespace stubs @@ -326,44 +338,44 @@ namespace utils { /// GetRemoteInfo result is (Triple, PointerSize, PageSize, TrampolineSize, /// IndirectStubsSize). - class GetRemoteInfo - : public rpc::Function< - GetRemoteInfo, - std::tuple<std::string, uint32_t, uint32_t, uint32_t, uint32_t>()> { - public: - static const char *getName() { return "GetRemoteInfo"; } - }; +class GetRemoteInfo + : public shared::RPCFunction< + GetRemoteInfo, + std::tuple<std::string, uint32_t, uint32_t, uint32_t, uint32_t>()> { +public: + static const char *getName() { return "GetRemoteInfo"; } +}; /// Get the address of a remote symbol. - class GetSymbolAddress - : public rpc::Function<GetSymbolAddress, - JITTargetAddress(std::string SymbolName)> { - public: - static const char *getName() { return "GetSymbolAddress"; } - }; +class GetSymbolAddress + : public shared::RPCFunction<GetSymbolAddress, + JITTargetAddress(std::string SymbolName)> { +public: + static const char *getName() { return "GetSymbolAddress"; } +}; /// Request that the host execute a compile callback. - class RequestCompile - : public rpc::Function< - RequestCompile, JITTargetAddress(JITTargetAddress TrampolineAddr)> { - public: - static const char *getName() { return "RequestCompile"; } - }; +class RequestCompile + : public shared::RPCFunction< + RequestCompile, JITTargetAddress(JITTargetAddress TrampolineAddr)> { +public: + static const char *getName() { return "RequestCompile"; } +}; /// Notify the remote and terminate the session. - class TerminateSession : public rpc::Function<TerminateSession, void()> { - public: - static const char *getName() { return "TerminateSession"; } - }; +class TerminateSession : public shared::RPCFunction<TerminateSession, void()> { +public: + static const char *getName() { return "TerminateSession"; } +}; } // namespace utils class OrcRemoteTargetRPCAPI - : public rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel> { + : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> { public: // FIXME: Remove constructors once MSVC supports synthesizing move-ops. - OrcRemoteTargetRPCAPI(rpc::RawByteChannel &C) - : rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel>(C, true) {} + OrcRemoteTargetRPCAPI(shared::RawByteChannel &C) + : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(C, true) {} }; } // end namespace remote diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h index 50c155d77db1..ce9bf064303d 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h @@ -16,8 +16,8 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" -#include "llvm/ExecutionEngine/Orc/OrcError.h" #include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h" +#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/Format.h" @@ -46,7 +46,7 @@ namespace remote { template <typename ChannelT, typename TargetT> class OrcRemoteTargetServer - : public rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel> { + : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> { public: using SymbolLookupFtor = std::function<JITTargetAddress(const std::string &Name)>; @@ -57,12 +57,14 @@ public: OrcRemoteTargetServer(ChannelT &Channel, SymbolLookupFtor SymbolLookup, EHFrameRegistrationFtor EHFramesRegister, EHFrameRegistrationFtor EHFramesDeregister) - : rpc::SingleThreadedRPCEndpoint<rpc::RawByteChannel>(Channel, true), + : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(Channel, + true), SymbolLookup(std::move(SymbolLookup)), EHFramesRegister(std::move(EHFramesRegister)), EHFramesDeregister(std::move(EHFramesDeregister)) { using ThisT = std::remove_reference_t<decltype(*this)>; addHandler<exec::CallIntVoid>(*this, &ThisT::handleCallIntVoid); + addHandler<exec::CallIntInt>(*this, &ThisT::handleCallIntInt); addHandler<exec::CallMain>(*this, &ThisT::handleCallMain); addHandler<exec::CallVoidVoid>(*this, &ThisT::handleCallVoidVoid); addHandler<mem::CreateRemoteAllocator>(*this, @@ -168,6 +170,19 @@ private: return Result; } + Expected<int32_t> handleCallIntInt(JITTargetAddress Addr, int Arg) { + using IntIntFnTy = int (*)(int); + + IntIntFnTy Fn = reinterpret_cast<IntIntFnTy>(static_cast<uintptr_t>(Addr)); + + LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr) + << " with argument " << Arg << "\n"); + int Result = Fn(Arg); + LLVM_DEBUG(dbgs() << " Result = " << Result << "\n"); + + return Result; + } + Expected<int32_t> handleCallMain(JITTargetAddress Addr, std::vector<std::string> Args) { using MainFnTy = int (*)(int, const char *[]); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h index 9ada0871cf0c..7dfbf32b1ffa 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h @@ -20,7 +20,6 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/Layer.h" -#include "llvm/ExecutionEngine/Orc/Legacy.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" @@ -36,16 +35,16 @@ namespace llvm { namespace orc { -class RTDyldObjectLinkingLayer : public ObjectLayer { +class RTDyldObjectLinkingLayer : public ObjectLayer, private ResourceManager { public: /// Functor for receiving object-loaded notifications. - using NotifyLoadedFunction = - std::function<void(VModuleKey, const object::ObjectFile &Obj, - const RuntimeDyld::LoadedObjectInfo &)>; + using NotifyLoadedFunction = std::function<void( + MaterializationResponsibility &R, const object::ObjectFile &Obj, + const RuntimeDyld::LoadedObjectInfo &)>; /// Functor for receiving finalization notifications. - using NotifyEmittedFunction = - std::function<void(VModuleKey, std::unique_ptr<MemoryBuffer>)>; + using NotifyEmittedFunction = std::function<void( + MaterializationResponsibility &R, std::unique_ptr<MemoryBuffer>)>; using GetMemoryManagerFunction = std::function<std::unique_ptr<RuntimeDyld::MemoryManager>()>; @@ -58,7 +57,7 @@ public: ~RTDyldObjectLinkingLayer(); /// Emit the object. - void emit(MaterializationResponsibility R, + void emit(std::unique_ptr<MaterializationResponsibility> R, std::unique_ptr<MemoryBuffer> O) override; /// Set the NotifyLoaded callback. @@ -123,16 +122,23 @@ public: void unregisterJITEventListener(JITEventListener &L); private: - Error onObjLoad(VModuleKey K, MaterializationResponsibility &R, + using MemoryManagerUP = std::unique_ptr<RuntimeDyld::MemoryManager>; + + Error onObjLoad(MaterializationResponsibility &R, const object::ObjectFile &Obj, - RuntimeDyld::MemoryManager *MemMgr, - std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo, + RuntimeDyld::MemoryManager &MemMgr, + RuntimeDyld::LoadedObjectInfo &LoadedObjInfo, std::map<StringRef, JITEvaluatedSymbol> Resolved, std::set<StringRef> &InternalSymbols); - void onObjEmit(VModuleKey K, MaterializationResponsibility &R, + void onObjEmit(MaterializationResponsibility &R, object::OwningBinary<object::ObjectFile> O, - RuntimeDyld::MemoryManager *MemMgr, Error Err); + std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr, + std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo, + Error Err); + + Error handleRemoveResources(ResourceKey K) override; + void handleTransferResources(ResourceKey DstKey, ResourceKey SrcKey) override; mutable std::mutex RTDyldLayerMutex; GetMemoryManagerFunction GetMemoryManager; @@ -141,361 +147,8 @@ private: bool ProcessAllSections = false; bool OverrideObjectFlags = false; bool AutoClaimObjectSymbols = false; - std::vector<std::unique_ptr<RuntimeDyld::MemoryManager>> MemMgrs; + DenseMap<ResourceKey, std::vector<MemoryManagerUP>> MemMgrs; std::vector<JITEventListener *> EventListeners; - DenseMap<RuntimeDyld::MemoryManager *, - std::unique_ptr<RuntimeDyld::LoadedObjectInfo>> - LoadedObjInfos; -}; - -class LegacyRTDyldObjectLinkingLayerBase { -public: - using ObjectPtr = std::unique_ptr<MemoryBuffer>; - -protected: - - /// Holds an object to be allocated/linked as a unit in the JIT. - /// - /// An instance of this class will be created for each object added - /// via JITObjectLayer::addObject. Deleting the instance (via - /// removeObject) frees its memory, removing all symbol definitions that - /// had been provided by this instance. Higher level layers are responsible - /// for taking any action required to handle the missing symbols. - class LinkedObject { - public: - LinkedObject() = default; - LinkedObject(const LinkedObject&) = delete; - void operator=(const LinkedObject&) = delete; - virtual ~LinkedObject() = default; - - virtual Error finalize() = 0; - - virtual JITSymbol::GetAddressFtor - getSymbolMaterializer(std::string Name) = 0; - - virtual void mapSectionAddress(const void *LocalAddress, - JITTargetAddress TargetAddr) const = 0; - - JITSymbol getSymbol(StringRef Name, bool ExportedSymbolsOnly) { - auto SymEntry = SymbolTable.find(Name); - if (SymEntry == SymbolTable.end()) - return nullptr; - if (!SymEntry->second.getFlags().isExported() && ExportedSymbolsOnly) - return nullptr; - if (!Finalized) - return JITSymbol(getSymbolMaterializer(std::string(Name)), - SymEntry->second.getFlags()); - return JITSymbol(SymEntry->second); - } - - protected: - StringMap<JITEvaluatedSymbol> SymbolTable; - bool Finalized = false; - }; -}; - -/// Bare bones object linking layer. -/// -/// This class is intended to be used as the base layer for a JIT. It allows -/// object files to be loaded into memory, linked, and the addresses of their -/// symbols queried. All objects added to this layer can see each other's -/// symbols. -class LegacyRTDyldObjectLinkingLayer : public LegacyRTDyldObjectLinkingLayerBase { -public: - - using LegacyRTDyldObjectLinkingLayerBase::ObjectPtr; - - /// Functor for receiving object-loaded notifications. - using NotifyLoadedFtor = - std::function<void(VModuleKey, const object::ObjectFile &Obj, - const RuntimeDyld::LoadedObjectInfo &)>; - - /// Functor for receiving finalization notifications. - using NotifyFinalizedFtor = - std::function<void(VModuleKey, const object::ObjectFile &Obj, - const RuntimeDyld::LoadedObjectInfo &)>; - - /// Functor for receiving deallocation notifications. - using NotifyFreedFtor = std::function<void(VModuleKey, const object::ObjectFile &Obj)>; - -private: - using OwnedObject = object::OwningBinary<object::ObjectFile>; - - template <typename MemoryManagerPtrT> - class ConcreteLinkedObject : public LinkedObject { - public: - ConcreteLinkedObject(LegacyRTDyldObjectLinkingLayer &Parent, VModuleKey K, - OwnedObject Obj, MemoryManagerPtrT MemMgr, - std::shared_ptr<SymbolResolver> Resolver, - bool ProcessAllSections) - : K(std::move(K)), - Parent(Parent), - MemMgr(std::move(MemMgr)), - PFC(std::make_unique<PreFinalizeContents>( - std::move(Obj), std::move(Resolver), - ProcessAllSections)) { - buildInitialSymbolTable(PFC->Obj); - } - - ~ConcreteLinkedObject() override { - if (this->Parent.NotifyFreed && ObjForNotify.getBinary()) - this->Parent.NotifyFreed(K, *ObjForNotify.getBinary()); - - MemMgr->deregisterEHFrames(); - } - - Error finalize() override { - assert(PFC && "mapSectionAddress called on finalized LinkedObject"); - - JITSymbolResolverAdapter ResolverAdapter(Parent.ES, *PFC->Resolver, - nullptr); - PFC->RTDyld = std::make_unique<RuntimeDyld>(*MemMgr, ResolverAdapter); - PFC->RTDyld->setProcessAllSections(PFC->ProcessAllSections); - - Finalized = true; - - std::unique_ptr<RuntimeDyld::LoadedObjectInfo> Info = - PFC->RTDyld->loadObject(*PFC->Obj.getBinary()); - - // Copy the symbol table out of the RuntimeDyld instance. - { - auto SymTab = PFC->RTDyld->getSymbolTable(); - for (auto &KV : SymTab) - SymbolTable[KV.first] = KV.second; - } - - if (Parent.NotifyLoaded) - Parent.NotifyLoaded(K, *PFC->Obj.getBinary(), *Info); - - PFC->RTDyld->finalizeWithMemoryManagerLocking(); - - if (PFC->RTDyld->hasError()) - return make_error<StringError>(PFC->RTDyld->getErrorString(), - inconvertibleErrorCode()); - - if (Parent.NotifyFinalized) - Parent.NotifyFinalized(K, *PFC->Obj.getBinary(), *Info); - - // Release resources. - if (this->Parent.NotifyFreed) - ObjForNotify = std::move(PFC->Obj); // needed for callback - PFC = nullptr; - return Error::success(); - } - - JITSymbol::GetAddressFtor getSymbolMaterializer(std::string Name) override { - return [this, Name]() -> Expected<JITTargetAddress> { - // The symbol may be materialized between the creation of this lambda - // and its execution, so we need to double check. - if (!this->Finalized) - if (auto Err = this->finalize()) - return std::move(Err); - return this->getSymbol(Name, false).getAddress(); - }; - } - - void mapSectionAddress(const void *LocalAddress, - JITTargetAddress TargetAddr) const override { - assert(PFC && "mapSectionAddress called on finalized LinkedObject"); - assert(PFC->RTDyld && "mapSectionAddress called on raw LinkedObject"); - PFC->RTDyld->mapSectionAddress(LocalAddress, TargetAddr); - } - - private: - void buildInitialSymbolTable(const OwnedObject &Obj) { - for (auto &Symbol : Obj.getBinary()->symbols()) { - if (Expected<uint32_t> SymbolFlagsOrErr = Symbol.getFlags()) { - if (*SymbolFlagsOrErr & object::SymbolRef::SF_Undefined) - continue; - } else { - // FIXME: Raise an error for bad symbols. - consumeError(SymbolFlagsOrErr.takeError()); - continue; - } - - Expected<StringRef> SymbolName = Symbol.getName(); - // FIXME: Raise an error for bad symbols. - if (!SymbolName) { - consumeError(SymbolName.takeError()); - continue; - } - // FIXME: Raise an error for bad symbols. - auto Flags = JITSymbolFlags::fromObjectSymbol(Symbol); - if (!Flags) { - consumeError(Flags.takeError()); - continue; - } - SymbolTable.insert( - std::make_pair(*SymbolName, JITEvaluatedSymbol(0, *Flags))); - } - } - - // Contains the information needed prior to finalization: the object files, - // memory manager, resolver, and flags needed for RuntimeDyld. - struct PreFinalizeContents { - PreFinalizeContents(OwnedObject Obj, - std::shared_ptr<SymbolResolver> Resolver, - bool ProcessAllSections) - : Obj(std::move(Obj)), - Resolver(std::move(Resolver)), - ProcessAllSections(ProcessAllSections) {} - - OwnedObject Obj; - std::shared_ptr<SymbolResolver> Resolver; - bool ProcessAllSections; - std::unique_ptr<RuntimeDyld> RTDyld; - }; - - VModuleKey K; - LegacyRTDyldObjectLinkingLayer &Parent; - MemoryManagerPtrT MemMgr; - OwnedObject ObjForNotify; - std::unique_ptr<PreFinalizeContents> PFC; - }; - - template <typename MemoryManagerPtrT> - std::unique_ptr<ConcreteLinkedObject<MemoryManagerPtrT>> - createLinkedObject(LegacyRTDyldObjectLinkingLayer &Parent, VModuleKey K, - OwnedObject Obj, MemoryManagerPtrT MemMgr, - std::shared_ptr<SymbolResolver> Resolver, - bool ProcessAllSections) { - using LOS = ConcreteLinkedObject<MemoryManagerPtrT>; - return std::make_unique<LOS>(Parent, std::move(K), std::move(Obj), - std::move(MemMgr), std::move(Resolver), - ProcessAllSections); - } - -public: - struct Resources { - std::shared_ptr<RuntimeDyld::MemoryManager> MemMgr; - std::shared_ptr<SymbolResolver> Resolver; - }; - - using ResourcesGetter = std::function<Resources(VModuleKey)>; - - /// Construct an ObjectLinkingLayer with the given NotifyLoaded, - /// and NotifyFinalized functors. - LLVM_ATTRIBUTE_DEPRECATED( - LegacyRTDyldObjectLinkingLayer( - ExecutionSession &ES, ResourcesGetter GetResources, - NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(), - NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor(), - NotifyFreedFtor NotifyFreed = NotifyFreedFtor()), - "ORCv1 layers (layers with the 'Legacy' prefix) are deprecated. Please " - "use " - "ORCv2 (see docs/ORCv2.rst)"); - - // Legacy layer constructor with deprecation acknowledgement. - LegacyRTDyldObjectLinkingLayer( - ORCv1DeprecationAcknowledgement, ExecutionSession &ES, - ResourcesGetter GetResources, - NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(), - NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor(), - NotifyFreedFtor NotifyFreed = NotifyFreedFtor()) - : ES(ES), GetResources(std::move(GetResources)), - NotifyLoaded(std::move(NotifyLoaded)), - NotifyFinalized(std::move(NotifyFinalized)), - NotifyFreed(std::move(NotifyFreed)), ProcessAllSections(false) {} - - /// Set the 'ProcessAllSections' flag. - /// - /// If set to true, all sections in each object file will be allocated using - /// the memory manager, rather than just the sections required for execution. - /// - /// This is kludgy, and may be removed in the future. - void setProcessAllSections(bool ProcessAllSections) { - this->ProcessAllSections = ProcessAllSections; - } - - /// Add an object to the JIT. - Error addObject(VModuleKey K, ObjectPtr ObjBuffer) { - - auto Obj = - object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef()); - if (!Obj) - return Obj.takeError(); - - assert(!LinkedObjects.count(K) && "VModuleKey already in use"); - - auto R = GetResources(K); - - LinkedObjects[K] = createLinkedObject( - *this, K, OwnedObject(std::move(*Obj), std::move(ObjBuffer)), - std::move(R.MemMgr), std::move(R.Resolver), ProcessAllSections); - - return Error::success(); - } - - /// Remove the object associated with VModuleKey K. - /// - /// All memory allocated for the object will be freed, and the sections and - /// symbols it provided will no longer be available. No attempt is made to - /// re-emit the missing symbols, and any use of these symbols (directly or - /// indirectly) will result in undefined behavior. If dependence tracking is - /// required to detect or resolve such issues it should be added at a higher - /// layer. - Error removeObject(VModuleKey K) { - assert(LinkedObjects.count(K) && "VModuleKey not associated with object"); - // How do we invalidate the symbols in H? - LinkedObjects.erase(K); - return Error::success(); - } - - /// Search for the given named symbol. - /// @param Name The name of the symbol to search for. - /// @param ExportedSymbolsOnly If true, search only for exported symbols. - /// @return A handle for the given named symbol, if it exists. - JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) { - for (auto &KV : LinkedObjects) - if (auto Sym = KV.second->getSymbol(Name, ExportedSymbolsOnly)) - return Sym; - else if (auto Err = Sym.takeError()) - return std::move(Err); - - return nullptr; - } - - /// Search for the given named symbol in the context of the loaded - /// object represented by the VModuleKey K. - /// @param K The VModuleKey for the object to search in. - /// @param Name The name of the symbol to search for. - /// @param ExportedSymbolsOnly If true, search only for exported symbols. - /// @return A handle for the given named symbol, if it is found in the - /// given object. - JITSymbol findSymbolIn(VModuleKey K, StringRef Name, - bool ExportedSymbolsOnly) { - assert(LinkedObjects.count(K) && "VModuleKey not associated with object"); - return LinkedObjects[K]->getSymbol(Name, ExportedSymbolsOnly); - } - - /// Map section addresses for the object associated with the - /// VModuleKey K. - void mapSectionAddress(VModuleKey K, const void *LocalAddress, - JITTargetAddress TargetAddr) { - assert(LinkedObjects.count(K) && "VModuleKey not associated with object"); - LinkedObjects[K]->mapSectionAddress(LocalAddress, TargetAddr); - } - - /// Immediately emit and finalize the object represented by the given - /// VModuleKey. - /// @param K VModuleKey for object to emit/finalize. - Error emitAndFinalize(VModuleKey K) { - assert(LinkedObjects.count(K) && "VModuleKey not associated with object"); - return LinkedObjects[K]->finalize(); - } - -private: - ExecutionSession &ES; - - ResourcesGetter GetResources; - NotifyLoadedFtor NotifyLoaded; - NotifyFinalizedFtor NotifyFinalized; - NotifyFreedFtor NotifyFreed; - - // NB! `LinkedObjects` needs to be destroyed before `NotifyFreed` because - // `~ConcreteLinkedObject` calls `NotifyFreed` - std::map<VModuleKey, std::unique_ptr<LinkedObject>> LinkedObjects; - bool ProcessAllSections = false; }; } // end namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h deleted file mode 100644 index d7304cfcf931..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/RemoteObjectLayer.h +++ /dev/null @@ -1,564 +0,0 @@ -//===------ RemoteObjectLayer.h - Forwards objs to a remote -----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Forwards objects to a remote object layer via RPC. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_REMOTEOBJECTLAYER_H -#define LLVM_EXECUTIONENGINE_ORC_REMOTEOBJECTLAYER_H - -#include "llvm/ExecutionEngine/Orc/Core.h" -#include "llvm/ExecutionEngine/Orc/LambdaResolver.h" -#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h" -#include "llvm/Object/ObjectFile.h" -#include <map> - -namespace llvm { -namespace orc { - -/// RPC API needed by RemoteObjectClientLayer and RemoteObjectServerLayer. -class RemoteObjectLayerAPI { -public: - - using ObjHandleT = remote::ResourceIdMgr::ResourceId; - -protected: - - using RemoteSymbolId = remote::ResourceIdMgr::ResourceId; - using RemoteSymbol = std::pair<RemoteSymbolId, JITSymbolFlags>; - -public: - - using BadSymbolHandleError = remote::ResourceNotFound<RemoteSymbolId>; - using BadObjectHandleError = remote::ResourceNotFound<ObjHandleT>; - -protected: - - static const ObjHandleT InvalidObjectHandleId = 0; - static const RemoteSymbolId NullSymbolId = 0; - - class AddObject - : public rpc::Function<AddObject, Expected<ObjHandleT>(std::string)> { - public: - static const char *getName() { return "AddObject"; } - }; - - class RemoveObject - : public rpc::Function<RemoveObject, Error(ObjHandleT)> { - public: - static const char *getName() { return "RemoveObject"; } - }; - - class FindSymbol - : public rpc::Function<FindSymbol, Expected<RemoteSymbol>(std::string, - bool)> { - public: - static const char *getName() { return "FindSymbol"; } - }; - - class FindSymbolIn - : public rpc::Function<FindSymbolIn, - Expected<RemoteSymbol>(ObjHandleT, std::string, - bool)> { - public: - static const char *getName() { return "FindSymbolIn"; } - }; - - class EmitAndFinalize - : public rpc::Function<EmitAndFinalize, - Error(ObjHandleT)> { - public: - static const char *getName() { return "EmitAndFinalize"; } - }; - - class Lookup - : public rpc::Function<Lookup, - Expected<RemoteSymbol>(ObjHandleT, std::string)> { - public: - static const char *getName() { return "Lookup"; } - }; - - class LookupInLogicalDylib - : public rpc::Function<LookupInLogicalDylib, - Expected<RemoteSymbol>(ObjHandleT, std::string)> { - public: - static const char *getName() { return "LookupInLogicalDylib"; } - }; - - class ReleaseRemoteSymbol - : public rpc::Function<ReleaseRemoteSymbol, Error(RemoteSymbolId)> { - public: - static const char *getName() { return "ReleaseRemoteSymbol"; } - }; - - class MaterializeRemoteSymbol - : public rpc::Function<MaterializeRemoteSymbol, - Expected<JITTargetAddress>(RemoteSymbolId)> { - public: - static const char *getName() { return "MaterializeRemoteSymbol"; } - }; -}; - -/// Base class containing common utilities for RemoteObjectClientLayer and -/// RemoteObjectServerLayer. -template <typename RPCEndpoint> -class RemoteObjectLayer : public RemoteObjectLayerAPI { -public: - - RemoteObjectLayer(RPCEndpoint &Remote, - std::function<void(Error)> ReportError) - : Remote(Remote), ReportError(std::move(ReportError)), - SymbolIdMgr(NullSymbolId + 1) { - using ThisT = RemoteObjectLayer<RPCEndpoint>; - Remote.template addHandler<ReleaseRemoteSymbol>( - *this, &ThisT::handleReleaseRemoteSymbol); - Remote.template addHandler<MaterializeRemoteSymbol>( - *this, &ThisT::handleMaterializeRemoteSymbol); - } - -protected: - - /// This class is used as the symbol materializer for JITSymbols returned by - /// RemoteObjectLayerClient/RemoteObjectLayerServer -- the materializer knows - /// how to call back to the other RPC endpoint to get the address when - /// requested. - class RemoteSymbolMaterializer { - public: - - /// Construct a RemoteSymbolMaterializer for the given RemoteObjectLayer - /// with the given Id. - RemoteSymbolMaterializer(RemoteObjectLayer &C, - RemoteSymbolId Id) - : C(C), Id(Id) {} - - RemoteSymbolMaterializer(RemoteSymbolMaterializer &&Other) - : C(Other.C), Id(Other.Id) { - Other.Id = 0; - } - - RemoteSymbolMaterializer &operator=(RemoteSymbolMaterializer &&) = delete; - - /// Release the remote symbol. - ~RemoteSymbolMaterializer() { - if (Id) - C.releaseRemoteSymbol(Id); - } - - /// Materialize the symbol on the remote and get its address. - Expected<JITTargetAddress> materialize() { - auto Addr = C.materializeRemoteSymbol(Id); - Id = 0; - return Addr; - } - - private: - RemoteObjectLayer &C; - RemoteSymbolId Id; - }; - - /// Convenience function for getting a null remote symbol value. - RemoteSymbol nullRemoteSymbol() { - return RemoteSymbol(0, JITSymbolFlags()); - } - - /// Creates a StringError that contains a copy of Err's log message, then - /// sends that StringError to ReportError. - /// - /// This allows us to locally log error messages for errors that will actually - /// be delivered to the remote. - Error teeLog(Error Err) { - return handleErrors(std::move(Err), - [this](std::unique_ptr<ErrorInfoBase> EIB) { - ReportError(make_error<StringError>( - EIB->message(), - EIB->convertToErrorCode())); - return Error(std::move(EIB)); - }); - } - - Error badRemoteSymbolIdError(RemoteSymbolId Id) { - return make_error<BadSymbolHandleError>(Id, "Remote JIT Symbol"); - } - - Error badObjectHandleError(ObjHandleT H) { - return make_error<RemoteObjectLayerAPI::BadObjectHandleError>( - H, "Bad object handle"); - } - - /// Create a RemoteSymbol wrapping the given JITSymbol. - Expected<RemoteSymbol> jitSymbolToRemote(JITSymbol Sym) { - if (Sym) { - auto Id = SymbolIdMgr.getNext(); - auto Flags = Sym.getFlags(); - assert(!InUseSymbols.count(Id) && "Symbol id already in use"); - InUseSymbols.insert(std::make_pair(Id, std::move(Sym))); - return RemoteSymbol(Id, Flags); - } else if (auto Err = Sym.takeError()) - return teeLog(std::move(Err)); - // else... - return nullRemoteSymbol(); - } - - /// Convert an Expected<RemoteSymbol> to a JITSymbol. - JITSymbol remoteToJITSymbol(Expected<RemoteSymbol> RemoteSymOrErr) { - if (RemoteSymOrErr) { - auto &RemoteSym = *RemoteSymOrErr; - if (RemoteSym == nullRemoteSymbol()) - return nullptr; - // else... - RemoteSymbolMaterializer RSM(*this, RemoteSym.first); - auto Sym = JITSymbol( - [RSM = std::move(RSM)]() mutable { return RSM.materialize(); }, - RemoteSym.second); - return Sym; - } else - return RemoteSymOrErr.takeError(); - } - - RPCEndpoint &Remote; - std::function<void(Error)> ReportError; - -private: - - /// Notify the remote to release the given JITSymbol. - void releaseRemoteSymbol(RemoteSymbolId Id) { - if (auto Err = Remote.template callB<ReleaseRemoteSymbol>(Id)) - ReportError(std::move(Err)); - } - - /// Notify the remote to materialize the JITSymbol with the given Id and - /// return its address. - Expected<JITTargetAddress> materializeRemoteSymbol(RemoteSymbolId Id) { - return Remote.template callB<MaterializeRemoteSymbol>(Id); - } - - /// Release the JITSymbol with the given Id. - Error handleReleaseRemoteSymbol(RemoteSymbolId Id) { - auto SI = InUseSymbols.find(Id); - if (SI != InUseSymbols.end()) { - InUseSymbols.erase(SI); - return Error::success(); - } else - return teeLog(badRemoteSymbolIdError(Id)); - } - - /// Run the materializer for the JITSymbol with the given Id and return its - /// address. - Expected<JITTargetAddress> handleMaterializeRemoteSymbol(RemoteSymbolId Id) { - auto SI = InUseSymbols.find(Id); - if (SI != InUseSymbols.end()) { - auto AddrOrErr = SI->second.getAddress(); - InUseSymbols.erase(SI); - SymbolIdMgr.release(Id); - if (AddrOrErr) - return *AddrOrErr; - else - return teeLog(AddrOrErr.takeError()); - } else { - return teeLog(badRemoteSymbolIdError(Id)); - } - } - - remote::ResourceIdMgr SymbolIdMgr; - std::map<RemoteSymbolId, JITSymbol> InUseSymbols; -}; - -/// RemoteObjectClientLayer forwards the ORC Object Layer API over an RPC -/// connection. -/// -/// This class can be used as the base layer of a JIT stack on the client and -/// will forward operations to a corresponding RemoteObjectServerLayer on the -/// server (which can be composed on top of a "real" object layer like -/// RTDyldObjectLinkingLayer to actually carry out the operations). -/// -/// Sending relocatable objects to the server (rather than fully relocated -/// bits) allows JIT'd code to be cached on the server side and re-used in -/// subsequent JIT sessions. -template <typename RPCEndpoint> -class RemoteObjectClientLayer : public RemoteObjectLayer<RPCEndpoint> { -private: - - using AddObject = RemoteObjectLayerAPI::AddObject; - using RemoveObject = RemoteObjectLayerAPI::RemoveObject; - using FindSymbol = RemoteObjectLayerAPI::FindSymbol; - using FindSymbolIn = RemoteObjectLayerAPI::FindSymbolIn; - using EmitAndFinalize = RemoteObjectLayerAPI::EmitAndFinalize; - using Lookup = RemoteObjectLayerAPI::Lookup; - using LookupInLogicalDylib = RemoteObjectLayerAPI::LookupInLogicalDylib; - - using RemoteObjectLayer<RPCEndpoint>::teeLog; - using RemoteObjectLayer<RPCEndpoint>::badObjectHandleError; - using RemoteObjectLayer<RPCEndpoint>::remoteToJITSymbol; - -public: - - using ObjHandleT = RemoteObjectLayerAPI::ObjHandleT; - using RemoteSymbol = RemoteObjectLayerAPI::RemoteSymbol; - - using ObjectPtr = std::unique_ptr<MemoryBuffer>; - - /// Create a RemoteObjectClientLayer that communicates with a - /// RemoteObjectServerLayer instance via the given RPCEndpoint. - /// - /// The ReportError functor can be used locally log errors that are intended - /// to be sent sent - LLVM_ATTRIBUTE_DEPRECATED( - RemoteObjectClientLayer(RPCEndpoint &Remote, - std::function<void(Error)> ReportError), - "ORCv1 layers (including RemoteObjectClientLayer) are deprecated. Please " - "use " - "ORCv2 (see docs/ORCv2.rst)"); - - RemoteObjectClientLayer(ORCv1DeprecationAcknowledgement, RPCEndpoint &Remote, - std::function<void(Error)> ReportError) - : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)) { - using ThisT = RemoteObjectClientLayer<RPCEndpoint>; - Remote.template addHandler<Lookup>(*this, &ThisT::lookup); - Remote.template addHandler<LookupInLogicalDylib>( - *this, &ThisT::lookupInLogicalDylib); - } - - /// Add an object to the JIT. - /// - /// @return A handle that can be used to refer to the loaded object (for - /// symbol searching, finalization, freeing memory, etc.). - Expected<ObjHandleT> - addObject(ObjectPtr ObjBuffer, - std::shared_ptr<LegacyJITSymbolResolver> Resolver) { - if (auto HandleOrErr = - this->Remote.template callB<AddObject>(ObjBuffer->getBuffer())) { - auto &Handle = *HandleOrErr; - // FIXME: Return an error for this: - assert(!Resolvers.count(Handle) && "Handle already in use?"); - Resolvers[Handle] = std::move(Resolver); - return Handle; - } else - return HandleOrErr.takeError(); - } - - /// Remove the given object from the JIT. - Error removeObject(ObjHandleT H) { - return this->Remote.template callB<RemoveObject>(H); - } - - /// Search for the given named symbol. - JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) { - return remoteToJITSymbol( - this->Remote.template callB<FindSymbol>(Name, - ExportedSymbolsOnly)); - } - - /// Search for the given named symbol within the given context. - JITSymbol findSymbolIn(ObjHandleT H, StringRef Name, bool ExportedSymbolsOnly) { - return remoteToJITSymbol( - this->Remote.template callB<FindSymbolIn>(H, Name, - ExportedSymbolsOnly)); - } - - /// Immediately emit and finalize the object with the given handle. - Error emitAndFinalize(ObjHandleT H) { - return this->Remote.template callB<EmitAndFinalize>(H); - } - -private: - - Expected<RemoteSymbol> lookup(ObjHandleT H, const std::string &Name) { - auto RI = Resolvers.find(H); - if (RI != Resolvers.end()) { - return this->jitSymbolToRemote(RI->second->findSymbol(Name)); - } else - return teeLog(badObjectHandleError(H)); - } - - Expected<RemoteSymbol> lookupInLogicalDylib(ObjHandleT H, - const std::string &Name) { - auto RI = Resolvers.find(H); - if (RI != Resolvers.end()) - return this->jitSymbolToRemote( - RI->second->findSymbolInLogicalDylib(Name)); - else - return teeLog(badObjectHandleError(H)); - } - - std::map<remote::ResourceIdMgr::ResourceId, - std::shared_ptr<LegacyJITSymbolResolver>> - Resolvers; -}; - -/// RemoteObjectServerLayer acts as a server and handling RPC calls for the -/// object layer API from the given RPC connection. -/// -/// This class can be composed on top of a 'real' object layer (e.g. -/// RTDyldObjectLinkingLayer) to do the actual work of relocating objects -/// and making them executable. -template <typename BaseLayerT, typename RPCEndpoint> -class RemoteObjectServerLayer : public RemoteObjectLayer<RPCEndpoint> { -private: - - using ObjHandleT = RemoteObjectLayerAPI::ObjHandleT; - using RemoteSymbol = RemoteObjectLayerAPI::RemoteSymbol; - - using AddObject = RemoteObjectLayerAPI::AddObject; - using RemoveObject = RemoteObjectLayerAPI::RemoveObject; - using FindSymbol = RemoteObjectLayerAPI::FindSymbol; - using FindSymbolIn = RemoteObjectLayerAPI::FindSymbolIn; - using EmitAndFinalize = RemoteObjectLayerAPI::EmitAndFinalize; - using Lookup = RemoteObjectLayerAPI::Lookup; - using LookupInLogicalDylib = RemoteObjectLayerAPI::LookupInLogicalDylib; - - using RemoteObjectLayer<RPCEndpoint>::teeLog; - using RemoteObjectLayer<RPCEndpoint>::badObjectHandleError; - using RemoteObjectLayer<RPCEndpoint>::remoteToJITSymbol; - -public: - - /// Create a RemoteObjectServerLayer with the given base layer (which must be - /// an object layer), RPC endpoint, and error reporter function. - LLVM_ATTRIBUTE_DEPRECATED( - RemoteObjectServerLayer(BaseLayerT &BaseLayer, RPCEndpoint &Remote, - std::function<void(Error)> ReportError), - "ORCv1 layers (including RemoteObjectServerLayer) are deprecated. Please " - "use " - "ORCv2 (see docs/ORCv2.rst)"); - - RemoteObjectServerLayer(ORCv1DeprecationAcknowledgement, - BaseLayerT &BaseLayer, RPCEndpoint &Remote, - std::function<void(Error)> ReportError) - : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)), - BaseLayer(BaseLayer), HandleIdMgr(1) { - using ThisT = RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>; - - Remote.template addHandler<AddObject>(*this, &ThisT::addObject); - Remote.template addHandler<RemoveObject>(*this, &ThisT::removeObject); - Remote.template addHandler<FindSymbol>(*this, &ThisT::findSymbol); - Remote.template addHandler<FindSymbolIn>(*this, &ThisT::findSymbolIn); - Remote.template addHandler<EmitAndFinalize>(*this, &ThisT::emitAndFinalize); - } - -private: - - class StringMemoryBuffer : public MemoryBuffer { - public: - StringMemoryBuffer(std::string Buffer) - : Buffer(std::move(Buffer)) { - init(this->Buffer.data(), this->Buffer.data() + this->Buffer.size(), - false); - } - - BufferKind getBufferKind() const override { return MemoryBuffer_Malloc; } - private: - std::string Buffer; - }; - - JITSymbol lookup(ObjHandleT Id, const std::string &Name) { - return remoteToJITSymbol( - this->Remote.template callB<Lookup>(Id, Name)); - } - - JITSymbol lookupInLogicalDylib(ObjHandleT Id, const std::string &Name) { - return remoteToJITSymbol( - this->Remote.template callB<LookupInLogicalDylib>(Id, Name)); - } - - Expected<ObjHandleT> addObject(std::string ObjBuffer) { - auto Buffer = std::make_unique<StringMemoryBuffer>(std::move(ObjBuffer)); - auto Id = HandleIdMgr.getNext(); - assert(!BaseLayerHandles.count(Id) && "Id already in use?"); - - auto Resolver = createLambdaResolver( - AcknowledgeORCv1Deprecation, - [this, Id](const std::string &Name) { return lookup(Id, Name); }, - [this, Id](const std::string &Name) { - return lookupInLogicalDylib(Id, Name); - }); - - if (auto HandleOrErr = - BaseLayer.addObject(std::move(Buffer), std::move(Resolver))) { - BaseLayerHandles[Id] = std::move(*HandleOrErr); - return Id; - } else - return teeLog(HandleOrErr.takeError()); - } - - Error removeObject(ObjHandleT H) { - auto HI = BaseLayerHandles.find(H); - if (HI != BaseLayerHandles.end()) { - if (auto Err = BaseLayer.removeObject(HI->second)) - return teeLog(std::move(Err)); - return Error::success(); - } else - return teeLog(badObjectHandleError(H)); - } - - Expected<RemoteSymbol> findSymbol(const std::string &Name, - bool ExportedSymbolsOnly) { - if (auto Sym = BaseLayer.findSymbol(Name, ExportedSymbolsOnly)) - return this->jitSymbolToRemote(std::move(Sym)); - else if (auto Err = Sym.takeError()) - return teeLog(std::move(Err)); - return this->nullRemoteSymbol(); - } - - Expected<RemoteSymbol> findSymbolIn(ObjHandleT H, const std::string &Name, - bool ExportedSymbolsOnly) { - auto HI = BaseLayerHandles.find(H); - if (HI != BaseLayerHandles.end()) { - if (auto Sym = BaseLayer.findSymbolIn(HI->second, Name, ExportedSymbolsOnly)) - return this->jitSymbolToRemote(std::move(Sym)); - else if (auto Err = Sym.takeError()) - return teeLog(std::move(Err)); - return this->nullRemoteSymbol(); - } else - return teeLog(badObjectHandleError(H)); - } - - Error emitAndFinalize(ObjHandleT H) { - auto HI = BaseLayerHandles.find(H); - if (HI != BaseLayerHandles.end()) { - if (auto Err = BaseLayer.emitAndFinalize(HI->second)) - return teeLog(std::move(Err)); - return Error::success(); - } else - return teeLog(badObjectHandleError(H)); - } - - BaseLayerT &BaseLayer; - remote::ResourceIdMgr HandleIdMgr; - std::map<ObjHandleT, typename BaseLayerT::ObjHandleT> BaseLayerHandles; -}; - -template <typename RPCEndpoint> -RemoteObjectClientLayer<RPCEndpoint>::RemoteObjectClientLayer( - RPCEndpoint &Remote, std::function<void(Error)> ReportError) - : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)) { - using ThisT = RemoteObjectClientLayer<RPCEndpoint>; - Remote.template addHandler<Lookup>(*this, &ThisT::lookup); - Remote.template addHandler<LookupInLogicalDylib>( - *this, &ThisT::lookupInLogicalDylib); -} - -template <typename BaseLayerT, typename RPCEndpoint> -RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>::RemoteObjectServerLayer( - BaseLayerT &BaseLayer, RPCEndpoint &Remote, - std::function<void(Error)> ReportError) - : RemoteObjectLayer<RPCEndpoint>(Remote, std::move(ReportError)), - BaseLayer(BaseLayer), HandleIdMgr(1) { - using ThisT = RemoteObjectServerLayer<BaseLayerT, RPCEndpoint>; - - Remote.template addHandler<AddObject>(*this, &ThisT::addObject); - Remote.template addHandler<RemoveObject>(*this, &ThisT::removeObject); - Remote.template addHandler<FindSymbol>(*this, &ThisT::findSymbol); - Remote.template addHandler<FindSymbolIn>(*this, &ThisT::findSymbolIn); - Remote.template addHandler<EmitAndFinalize>(*this, &ThisT::emitAndFinalize); -} - -} // end namespace orc -} // end namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORC_REMOTEOBJECTLAYER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h new file mode 100644 index 000000000000..3f96fe3da49d --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h @@ -0,0 +1,79 @@ +//===- FDRawByteChannel.h - File descriptor based byte-channel -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// File descriptor based RawByteChannel. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H +#define LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H + +#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h" + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include <unistd.h> +#else +#include <io.h> +#endif + +namespace llvm { +namespace orc { +namespace shared { + +/// Serialization channel that reads from and writes from file descriptors. +class FDRawByteChannel final : public RawByteChannel { +public: + FDRawByteChannel(int InFD, int OutFD) : InFD(InFD), OutFD(OutFD) {} + + llvm::Error readBytes(char *Dst, unsigned Size) override { + assert(Dst && "Attempt to read into null."); + ssize_t Completed = 0; + while (Completed < static_cast<ssize_t>(Size)) { + ssize_t Read = ::read(InFD, Dst + Completed, Size - Completed); + if (Read <= 0) { + auto ErrNo = errno; + if (ErrNo == EAGAIN || ErrNo == EINTR) + continue; + else + return llvm::errorCodeToError( + std::error_code(errno, std::generic_category())); + } + Completed += Read; + } + return llvm::Error::success(); + } + + llvm::Error appendBytes(const char *Src, unsigned Size) override { + assert(Src && "Attempt to append from null."); + ssize_t Completed = 0; + while (Completed < static_cast<ssize_t>(Size)) { + ssize_t Written = ::write(OutFD, Src + Completed, Size - Completed); + if (Written < 0) { + auto ErrNo = errno; + if (ErrNo == EAGAIN || ErrNo == EINTR) + continue; + else + return llvm::errorCodeToError( + std::error_code(errno, std::generic_category())); + } + Completed += Written; + } + return llvm::Error::success(); + } + + llvm::Error send() override { return llvm::Error::success(); } + +private: + int InFD, OutFD; +}; + +} // namespace shared +} // namespace orc +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcError.h index 9b0d941f5459..9b0d941f5459 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcError.h diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h index f348844f39ce..e0ac640ebcdd 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h @@ -14,23 +14,23 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_RPCUTILS_H -#define LLVM_EXECUTIONENGINE_ORC_RPCUTILS_H +#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H +#define LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H #include <map> #include <thread> #include <vector> #include "llvm/ADT/STLExtras.h" -#include "llvm/ExecutionEngine/Orc/OrcError.h" -#include "llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h" +#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h" +#include "llvm/ExecutionEngine/Orc/Shared/Serialization.h" #include "llvm/Support/MSVCErrorWorkarounds.h" #include <future> namespace llvm { namespace orc { -namespace rpc { +namespace shared { /// Base class of all fatal RPC errors (those that necessarily result in the /// termination of the RPC session). @@ -56,7 +56,7 @@ public: /// function id it cannot parse the call. template <typename FnIdT, typename SeqNoT> class BadFunctionCall - : public ErrorInfo<BadFunctionCall<FnIdT, SeqNoT>, RPCFatalError> { + : public ErrorInfo<BadFunctionCall<FnIdT, SeqNoT>, RPCFatalError> { public: static char ID; @@ -68,8 +68,10 @@ public: } void log(raw_ostream &OS) const override { - OS << "Call to invalid RPC function id '" << FnId << "' with " - "sequence number " << SeqNo; + OS << "Call to invalid RPC function id '" << FnId + << "' with " + "sequence number " + << SeqNo; } private: @@ -89,12 +91,12 @@ char BadFunctionCall<FnIdT, SeqNoT>::ID = 0; /// a result parser for this sequence number it can't do that. template <typename SeqNoT> class InvalidSequenceNumberForResponse - : public ErrorInfo<InvalidSequenceNumberForResponse<SeqNoT>, RPCFatalError> { + : public ErrorInfo<InvalidSequenceNumberForResponse<SeqNoT>, + RPCFatalError> { public: static char ID; - InvalidSequenceNumberForResponse(SeqNoT SeqNo) - : SeqNo(std::move(SeqNo)) {} + InvalidSequenceNumberForResponse(SeqNoT SeqNo) : SeqNo(std::move(SeqNo)) {} std::error_code convertToErrorCode() const override { return orcError(OrcErrorCode::UnexpectedRPCCall); @@ -103,6 +105,7 @@ public: void log(raw_ostream &OS) const override { OS << "Response has unknown sequence number " << SeqNo; } + private: SeqNoT SeqNo; }; @@ -131,17 +134,18 @@ public: std::error_code convertToErrorCode() const override; void log(raw_ostream &OS) const override; const std::string &getSignature() const { return Signature; } + private: std::string Signature; }; -template <typename DerivedFunc, typename FnT> class Function; +template <typename DerivedFunc, typename FnT> class RPCFunction; // RPC Function class. // DerivedFunc should be a user defined class with a static 'getName()' method // returning a const char* representing the function's name. template <typename DerivedFunc, typename RetT, typename... ArgTs> -class Function<DerivedFunc, RetT(ArgTs...)> { +class RPCFunction<DerivedFunc, RetT(ArgTs...)> { public: /// User defined function type. using Type = RetT(ArgTs...); @@ -154,8 +158,9 @@ public: static std::string Name = [] { std::string Name; raw_string_ostream(Name) - << RPCTypeName<RetT>::getName() << " " << DerivedFunc::getName() - << "(" << llvm::orc::rpc::RPCTypeNameSequence<ArgTs...>() << ")"; + << SerializationTypeName<RetT>::getName() << " " + << DerivedFunc::getName() << "(" + << SerializationTypeNameSequence<ArgTs...>() << ")"; return Name; }(); return Name.data(); @@ -199,10 +204,10 @@ private: namespace detail { /// Provides a typedef for a tuple containing the decayed argument types. -template <typename T> class FunctionArgsTuple; +template <typename T> class RPCFunctionArgsTuple; template <typename RetT, typename... ArgTs> -class FunctionArgsTuple<RetT(ArgTs...)> { +class RPCFunctionArgsTuple<RetT(ArgTs...)> { public: using Type = std::tuple<std::decay_t<std::remove_reference_t<ArgTs>>...>; }; @@ -287,34 +292,28 @@ class ResultTraits<Expected<RetT>> : public ResultTraits<RetT> {}; // Determines whether an RPC function's defined error return type supports // error return value. -template <typename T> -class SupportsErrorReturn { +template <typename T> class SupportsErrorReturn { public: static const bool value = false; }; -template <> -class SupportsErrorReturn<Error> { +template <> class SupportsErrorReturn<Error> { public: static const bool value = true; }; -template <typename T> -class SupportsErrorReturn<Expected<T>> { +template <typename T> class SupportsErrorReturn<Expected<T>> { public: static const bool value = true; }; // RespondHelper packages return values based on whether or not the declared // RPC function return type supports error returns. -template <bool FuncSupportsErrorReturn> -class RespondHelper; +template <bool FuncSupportsErrorReturn> class RespondHelper; // RespondHelper specialization for functions that support error returns. -template <> -class RespondHelper<true> { +template <> class RespondHelper<true> { public: - // Send Expected<T>. template <typename WireRetT, typename HandlerRetT, typename ChannelT, typename FunctionIdT, typename SequenceNumberT> @@ -330,9 +329,8 @@ public: // Serialize the result. if (auto Err = - SerializationTraits<ChannelT, WireRetT, - Expected<HandlerRetT>>::serialize( - C, std::move(ResultOrErr))) + SerializationTraits<ChannelT, WireRetT, Expected<HandlerRetT>>:: + serialize(C, std::move(ResultOrErr))) return Err; // Close the response message. @@ -354,14 +352,11 @@ public: return Err2; return C.send(); } - }; // RespondHelper specialization for functions that do not support error returns. -template <> -class RespondHelper<false> { +template <> class RespondHelper<false> { public: - template <typename WireRetT, typename HandlerRetT, typename ChannelT, typename FunctionIdT, typename SequenceNumberT> static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId, @@ -376,8 +371,8 @@ public: // Serialize the result. if (auto Err = - SerializationTraits<ChannelT, WireRetT, HandlerRetT>::serialize( - C, *ResultOrErr)) + SerializationTraits<ChannelT, WireRetT, HandlerRetT>::serialize( + C, *ResultOrErr)) return Err; // End the response message. @@ -398,18 +393,17 @@ public: return Err2; return C.send(); } - }; - // Send a response of the given wire return type (WireRetT) over the // channel, with the given sequence number. template <typename WireRetT, typename HandlerRetT, typename ChannelT, typename FunctionIdT, typename SequenceNumberT> -Error respond(ChannelT &C, const FunctionIdT &ResponseId, - SequenceNumberT SeqNo, Expected<HandlerRetT> ResultOrErr) { +Error respond(ChannelT &C, const FunctionIdT &ResponseId, SequenceNumberT SeqNo, + Expected<HandlerRetT> ResultOrErr) { return RespondHelper<SupportsErrorReturn<WireRetT>::value>:: - template sendResult<WireRetT>(C, ResponseId, SeqNo, std::move(ResultOrErr)); + template sendResult<WireRetT>(C, ResponseId, SeqNo, + std::move(ResultOrErr)); } // Send an empty response message on the given channel to indicate that @@ -418,8 +412,8 @@ template <typename WireRetT, typename ChannelT, typename FunctionIdT, typename SequenceNumberT> Error respond(ChannelT &C, const FunctionIdT &ResponseId, SequenceNumberT SeqNo, Error Err) { - return RespondHelper<SupportsErrorReturn<WireRetT>::value>:: - sendResult(C, ResponseId, SeqNo, std::move(Err)); + return RespondHelper<SupportsErrorReturn<WireRetT>::value>::sendResult( + C, ResponseId, SeqNo, std::move(Err)); } // Converts a given type to the equivalent error return type. @@ -453,7 +447,8 @@ public: template <typename FnT> class AsyncHandlerTraits; template <typename ResultT, typename... ArgTs> -class AsyncHandlerTraits<Error(std::function<Error(Expected<ResultT>)>, ArgTs...)> { +class AsyncHandlerTraits<Error(std::function<Error(Expected<ResultT>)>, + ArgTs...)> { public: using Type = Error(ArgTs...); using ResultType = Expected<ResultT>; @@ -490,9 +485,9 @@ class AsyncHandlerTraits<Error(ResponseHandlerT, ArgTs...)> // specialized for function types) and inherits from the appropriate // speciilization for the given non-function type's call operator. template <typename HandlerT> -class HandlerTraits : public HandlerTraits<decltype( - &std::remove_reference<HandlerT>::type::operator())> { -}; +class HandlerTraits + : public HandlerTraits< + decltype(&std::remove_reference<HandlerT>::type::operator())> {}; // Traits for handlers with a given function type. template <typename RetT, typename... ArgTs> @@ -524,7 +519,7 @@ public: template <typename HandlerT> static std::enable_if_t< std::is_void<typename HandlerTraits<HandlerT>::ReturnType>::value, Error> - run(HandlerT &Handler, ArgTs &&... Args) { + run(HandlerT &Handler, ArgTs &&...Args) { Handler(std::move(Args)...); return Error::success(); } @@ -577,8 +572,8 @@ private: // Handler traits for free functions. template <typename RetT, typename... ArgTs> -class HandlerTraits<RetT(*)(ArgTs...)> - : public HandlerTraits<RetT(ArgTs...)> {}; +class HandlerTraits<RetT (*)(ArgTs...)> : public HandlerTraits<RetT(ArgTs...)> { +}; // Handler traits for class methods (especially call operators for lambdas). template <typename Class, typename RetT, typename... ArgTs> @@ -714,9 +709,8 @@ public: typename HandlerTraits<HandlerT>::Type>::ArgType; HandlerArgType Result((typename HandlerArgType::value_type())); - if (auto Err = - SerializationTraits<ChannelT, Expected<FuncRetT>, - HandlerArgType>::deserialize(C, Result)) + if (auto Err = SerializationTraits<ChannelT, Expected<FuncRetT>, + HandlerArgType>::deserialize(C, Result)) return Err; if (auto Err = C.endReceiveMessage()) return Err; @@ -786,7 +780,7 @@ public: using MethodT = RetT (ClassT::*)(ArgTs...); MemberFnWrapper(ClassT &Instance, MethodT Method) : Instance(Instance), Method(Method) {} - RetT operator()(ArgTs &&... Args) { + RetT operator()(ArgTs &&...Args) { return (Instance.*Method)(std::move(Args)...); } @@ -804,10 +798,9 @@ public: template <typename ArgT, typename... ArgTs> class ReadArgs<ArgT, ArgTs...> : public ReadArgs<ArgTs...> { public: - ReadArgs(ArgT &Arg, ArgTs &... Args) - : ReadArgs<ArgTs...>(Args...), Arg(Arg) {} + ReadArgs(ArgT &Arg, ArgTs &...Args) : ReadArgs<ArgTs...>(Args...), Arg(Arg) {} - Error operator()(ArgT &ArgVal, ArgTs &... ArgVals) { + Error operator()(ArgT &ArgVal, ArgTs &...ArgVals) { this->Arg = std::move(ArgVal); return ReadArgs<ArgTs...>::operator()(ArgVals...); } @@ -872,8 +865,8 @@ public: template <template <class, class> class P, typename T1Sig, typename T2Sig> class RPCArgTypeCheck { public: - using T1Tuple = typename FunctionArgsTuple<T1Sig>::Type; - using T2Tuple = typename FunctionArgsTuple<T2Sig>::Type; + using T1Tuple = typename RPCFunctionArgsTuple<T1Sig>::Type; + using T2Tuple = typename RPCFunctionArgsTuple<T2Sig>::Type; static_assert(std::tuple_size<T1Tuple>::value >= std::tuple_size<T2Tuple>::value, @@ -937,18 +930,18 @@ template <typename ImplT, typename ChannelT, typename FunctionIdT, typename SequenceNumberT> class RPCEndpointBase { protected: - class OrcRPCInvalid : public Function<OrcRPCInvalid, void()> { + class OrcRPCInvalid : public RPCFunction<OrcRPCInvalid, void()> { public: static const char *getName() { return "__orc_rpc$invalid"; } }; - class OrcRPCResponse : public Function<OrcRPCResponse, void()> { + class OrcRPCResponse : public RPCFunction<OrcRPCResponse, void()> { public: static const char *getName() { return "__orc_rpc$response"; } }; class OrcRPCNegotiate - : public Function<OrcRPCNegotiate, FunctionIdT(std::string)> { + : public RPCFunction<OrcRPCNegotiate, FunctionIdT(std::string)> { public: static const char *getName() { return "__orc_rpc$negotiate"; } }; @@ -994,7 +987,6 @@ public: [this](const std::string &Name) { return handleNegotiate(Name); }); } - /// Negotiate a function id for Func with the other end of the channel. template <typename Func> Error negotiateFunction(bool Retry = false) { return getRemoteFunctionId<Func>(true, Retry).takeError(); @@ -1006,7 +998,7 @@ public: /// or an Error (if Func::ReturnType is void). The handler will be called /// with an error if the return value is abandoned due to a channel error. template <typename Func, typename HandlerT, typename... ArgTs> - Error appendCallAsync(HandlerT Handler, const ArgTs &... Args) { + Error appendCallAsync(HandlerT Handler, const ArgTs &...Args) { static_assert( detail::RPCArgTypeCheck<CanSerializeCheck, typename Func::Type, @@ -1036,8 +1028,8 @@ public: // Install the user handler. PendingResponses[SeqNo] = - detail::createResponseHandler<ChannelT, typename Func::ReturnType>( - std::move(Handler)); + detail::createResponseHandler<ChannelT, typename Func::ReturnType>( + std::move(Handler)); } // Open the function call message. @@ -1065,7 +1057,7 @@ public: Error sendAppendedCalls() { return C.send(); }; template <typename Func, typename HandlerT, typename... ArgTs> - Error callAsync(HandlerT Handler, const ArgTs &... Args) { + Error callAsync(HandlerT Handler, const ArgTs &...Args) { if (auto Err = appendCallAsync<Func>(std::move(Handler), Args...)) return Err; return C.send(); @@ -1104,7 +1096,7 @@ public: /// /* Handle Args */ ; /// template <typename... ArgTs> - static detail::ReadArgs<ArgTs...> readArgs(ArgTs &... Args) { + static detail::ReadArgs<ArgTs...> readArgs(ArgTs &...Args) { return detail::ReadArgs<ArgTs...>(Args...); } @@ -1128,8 +1120,7 @@ public: /// Remove the handler for the given function. /// A handler must currently be registered for this function. - template <typename Func> - void removeHandler() { + template <typename Func> void removeHandler() { auto IdItr = LocalFunctionIds.find(Func::getPrototype()); assert(IdItr != LocalFunctionIds.end() && "Function does not have a registered handler"); @@ -1140,12 +1131,9 @@ public: } /// Clear all handlers. - void clearHandlers() { - Handlers.clear(); - } + void clearHandlers() { Handlers.clear(); } protected: - FunctionIdT getInvalidFunctionId() const { return FnIdAllocator.getInvalidId(); } @@ -1168,12 +1156,12 @@ protected: template <typename Func, typename HandlerT> void addAsyncHandlerImpl(HandlerT Handler) { - static_assert(detail::RPCArgTypeCheck< - CanDeserializeCheck, typename Func::Type, - typename detail::AsyncHandlerTraits< - typename detail::HandlerTraits<HandlerT>::Type - >::Type>::value, - ""); + static_assert( + detail::RPCArgTypeCheck< + CanDeserializeCheck, typename Func::Type, + typename detail::AsyncHandlerTraits< + typename detail::HandlerTraits<HandlerT>::Type>::Type>::value, + ""); FunctionIdT NewFnId = FnIdAllocator.template allocate<Func>(); LocalFunctionIds[Func::getPrototype()] = NewFnId; @@ -1197,8 +1185,8 @@ protected: // Unlock the pending results map to prevent recursive lock. Lock.unlock(); abandonPendingResponses(); - return make_error< - InvalidSequenceNumberForResponse<SequenceNumberT>>(SeqNo); + return make_error<InvalidSequenceNumberForResponse<SequenceNumberT>>( + SeqNo); } } @@ -1241,7 +1229,7 @@ protected: if (DoNegotiate) { auto &Impl = static_cast<ImplT &>(*this); if (auto RemoteIdOrErr = - Impl.template callB<OrcRPCNegotiate>(Func::getPrototype())) { + Impl.template callB<OrcRPCNegotiate>(Func::getPrototype())) { RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr; if (*RemoteIdOrErr == getInvalidFunctionId()) return make_error<CouldNotNegotiate>(Func::getPrototype()); @@ -1264,9 +1252,8 @@ protected: return [this, Handler](ChannelT &Channel, SequenceNumberT SeqNo) mutable -> Error { // Start by deserializing the arguments. - using ArgsTuple = - typename detail::FunctionArgsTuple< - typename detail::HandlerTraits<HandlerT>::Type>::Type; + using ArgsTuple = typename detail::RPCFunctionArgsTuple< + typename detail::HandlerTraits<HandlerT>::Type>::Type; auto Args = std::make_shared<ArgsTuple>(); if (auto Err = @@ -1298,9 +1285,9 @@ protected: SequenceNumberT SeqNo) mutable -> Error { // Start by deserializing the arguments. using AHTraits = detail::AsyncHandlerTraits< - typename detail::HandlerTraits<HandlerT>::Type>; + typename detail::HandlerTraits<HandlerT>::Type>; using ArgsTuple = - typename detail::FunctionArgsTuple<typename AHTraits::Type>::Type; + typename detail::RPCFunctionArgsTuple<typename AHTraits::Type>::Type; auto Args = std::make_shared<ArgsTuple>(); if (auto Err = @@ -1319,11 +1306,11 @@ protected: using HTraits = detail::HandlerTraits<HandlerT>; using FuncReturn = typename Func::ReturnType; - auto Responder = - [this, SeqNo](typename AHTraits::ResultType RetVal) -> Error { - return detail::respond<FuncReturn>(C, ResponseId, SeqNo, - std::move(RetVal)); - }; + auto Responder = [this, + SeqNo](typename AHTraits::ResultType RetVal) -> Error { + return detail::respond<FuncReturn>(C, ResponseId, SeqNo, + std::move(RetVal)); + }; return HTraits::unpackAndRunAsync(Handler, Responder, *Args); }; @@ -1356,17 +1343,16 @@ class MultiThreadedRPCEndpoint MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>, ChannelT, FunctionIdT, SequenceNumberT> { private: - using BaseClass = - detail::RPCEndpointBase< - MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>, - ChannelT, FunctionIdT, SequenceNumberT>; + using BaseClass = detail::RPCEndpointBase< + MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>, + ChannelT, FunctionIdT, SequenceNumberT>; public: MultiThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation) : BaseClass(C, LazyAutoNegotiation) {} /// Add a handler for the given RPC function. - /// This installs the given handler functor for the given RPC Function, and + /// This installs the given handler functor for the given RPCFunction, and /// makes the RPC function available for negotiation/calling from the remote. template <typename Func, typename HandlerT> void addHandler(HandlerT Handler) { @@ -1377,7 +1363,7 @@ public: template <typename Func, typename ClassT, typename RetT, typename... ArgTs> void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) { addHandler<Func>( - detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method)); + detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method)); } template <typename Func, typename HandlerT> @@ -1389,7 +1375,7 @@ public: template <typename Func, typename ClassT, typename RetT, typename... ArgTs> void addAsyncHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) { addAsyncHandler<Func>( - detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method)); + detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method)); } /// Return type for non-blocking call primitives. @@ -1405,7 +1391,7 @@ public: /// result. In multi-threaded mode the appendCallNB method, which does not /// return the sequence numeber, should be preferred. template <typename Func, typename... ArgTs> - Expected<NonBlockingCallResult<Func>> appendCallNB(const ArgTs &... Args) { + Expected<NonBlockingCallResult<Func>> appendCallNB(const ArgTs &...Args) { using RTraits = detail::ResultTraits<typename Func::ReturnType>; using ErrorReturn = typename RTraits::ErrorReturnType; using ErrorReturnPromise = typename RTraits::ReturnPromiseType; @@ -1428,7 +1414,7 @@ public: /// The same as appendCallNBWithSeq, except that it calls C.send() to /// flush the channel after serializing the call. template <typename Func, typename... ArgTs> - Expected<NonBlockingCallResult<Func>> callNB(const ArgTs &... Args) { + Expected<NonBlockingCallResult<Func>> callNB(const ArgTs &...Args) { auto Result = appendCallNB<Func>(Args...); if (!Result) return Result; @@ -1449,7 +1435,7 @@ public: template <typename Func, typename... ArgTs, typename AltRetT = typename Func::ReturnType> typename detail::ResultTraits<AltRetT>::ErrorReturnType - callB(const ArgTs &... Args) { + callB(const ArgTs &...Args) { if (auto FutureResOrErr = callNB<Func>(Args...)) return FutureResOrErr->get(); else @@ -1472,10 +1458,9 @@ class SingleThreadedRPCEndpoint SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>, ChannelT, FunctionIdT, SequenceNumberT> { private: - using BaseClass = - detail::RPCEndpointBase< - SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>, - ChannelT, FunctionIdT, SequenceNumberT>; + using BaseClass = detail::RPCEndpointBase< + SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>, + ChannelT, FunctionIdT, SequenceNumberT>; public: SingleThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation) @@ -1501,13 +1486,13 @@ public: template <typename Func, typename ClassT, typename RetT, typename... ArgTs> void addAsyncHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) { addAsyncHandler<Func>( - detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method)); + detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method)); } template <typename Func, typename... ArgTs, typename AltRetT = typename Func::ReturnType> typename detail::ResultTraits<AltRetT>::ErrorReturnType - callB(const ArgTs &... Args) { + callB(const ArgTs &...Args) { bool ReceivedResponse = false; using ResultType = typename detail::ResultTraits<AltRetT>::ErrorReturnType; auto Result = detail::ResultTraits<AltRetT>::createBlankErrorReturnValue(); @@ -1547,13 +1532,12 @@ public: }; /// Asynchronous dispatch for a function on an RPC endpoint. -template <typename RPCClass, typename Func> -class RPCAsyncDispatch { +template <typename RPCClass, typename Func> class RPCAsyncDispatch { public: RPCAsyncDispatch(RPCClass &Endpoint) : Endpoint(Endpoint) {} template <typename HandlerT, typename... ArgTs> - Error operator()(HandlerT Handler, const ArgTs &... Args) const { + Error operator()(HandlerT Handler, const ArgTs &...Args) const { return Endpoint.template appendCallAsync<Func>(std::move(Handler), Args...); } @@ -1571,7 +1555,6 @@ RPCAsyncDispatch<RPCEndpointT, Func> rpcAsyncDispatch(RPCEndpointT &Endpoint) { /// waited on as a group. class ParallelCallGroup { public: - ParallelCallGroup() = default; ParallelCallGroup(const ParallelCallGroup &) = delete; ParallelCallGroup &operator=(const ParallelCallGroup &) = delete; @@ -1579,7 +1562,7 @@ public: /// Make as asynchronous call. template <typename AsyncDispatcher, typename HandlerT, typename... ArgTs> Error call(const AsyncDispatcher &AsyncDispatch, HandlerT Handler, - const ArgTs &... Args) { + const ArgTs &...Args) { // Increment the count of outstanding calls. This has to happen before // we invoke the call, as the handler may (depending on scheduling) // be run immediately on another thread, and we don't want the decrement @@ -1618,70 +1601,57 @@ private: uint32_t NumOutstandingCalls = 0; }; -/// Convenience class for grouping RPC Functions into APIs that can be +/// Convenience class for grouping RPCFunctions into APIs that can be /// negotiated as a block. /// -template <typename... Funcs> -class APICalls { +template <typename... Funcs> class APICalls { public: - /// Test whether this API contains Function F. - template <typename F> - class Contains { + template <typename F> class Contains { public: static const bool value = false; }; /// Negotiate all functions in this API. - template <typename RPCEndpoint> - static Error negotiate(RPCEndpoint &R) { + template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) { return Error::success(); } }; -template <typename Func, typename... Funcs> -class APICalls<Func, Funcs...> { +template <typename Func, typename... Funcs> class APICalls<Func, Funcs...> { public: - - template <typename F> - class Contains { + template <typename F> class Contains { public: static const bool value = std::is_same<F, Func>::value | APICalls<Funcs...>::template Contains<F>::value; }; - template <typename RPCEndpoint> - static Error negotiate(RPCEndpoint &R) { + template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) { if (auto Err = R.template negotiateFunction<Func>()) return Err; return APICalls<Funcs...>::negotiate(R); } - }; template <typename... InnerFuncs, typename... Funcs> class APICalls<APICalls<InnerFuncs...>, Funcs...> { public: - - template <typename F> - class Contains { + template <typename F> class Contains { public: static const bool value = - APICalls<InnerFuncs...>::template Contains<F>::value | - APICalls<Funcs...>::template Contains<F>::value; + APICalls<InnerFuncs...>::template Contains<F>::value | + APICalls<Funcs...>::template Contains<F>::value; }; - template <typename RPCEndpoint> - static Error negotiate(RPCEndpoint &R) { + template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) { if (auto Err = APICalls<InnerFuncs...>::negotiate(R)) return Err; return APICalls<Funcs...>::negotiate(R); } - }; -} // end namespace rpc +} // end namespace shared } // end namespace orc } // end namespace llvm -#endif +#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h index 35745993248c..2ee471939251 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h @@ -1,4 +1,4 @@ -//===- llvm/ExecutionEngine/Orc/RPC/RawByteChannel.h ----------------*- C++ -*-===// +//===- RawByteChannel.h -----------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_RAWBYTECHANNEL_H -#define LLVM_EXECUTIONENGINE_ORC_RAWBYTECHANNEL_H +#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H +#define LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H #include "llvm/ADT/StringRef.h" -#include "llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h" +#include "llvm/ExecutionEngine/Orc/Shared/Serialization.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include <cstdint> @@ -20,9 +20,9 @@ namespace llvm { namespace orc { -namespace rpc { +namespace shared { -/// Interface for byte-streams to be used with RPC. +/// Interface for byte-streams to be used with ORC Serialization. class RawByteChannel { public: virtual ~RawByteChannel() = default; @@ -115,8 +115,7 @@ class SerializationTraits< public: static Error serialize(ChannelT &C, bool V) { uint8_t Tmp = V ? 1 : 0; - if (auto Err = - C.appendBytes(reinterpret_cast<const char *>(&Tmp), 1)) + if (auto Err = C.appendBytes(reinterpret_cast<const char *>(&Tmp), 1)) return Err; return Error::success(); } @@ -135,7 +134,7 @@ class SerializationTraits< ChannelT, std::string, StringRef, std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> { public: - /// RPC channel serialization for std::strings. + /// Serialization channel serialization for std::strings. static Error serialize(RawByteChannel &C, StringRef S) { if (auto Err = serializeSeq(C, static_cast<uint64_t>(S.size()))) return Err; @@ -161,13 +160,13 @@ class SerializationTraits< ChannelT, std::string, std::string, std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> { public: - /// RPC channel serialization for std::strings. + /// Serialization channel serialization for std::strings. static Error serialize(RawByteChannel &C, const std::string &S) { return SerializationTraits<ChannelT, std::string, StringRef>::serialize(C, S); } - /// RPC channel deserialization for std::strings. + /// Serialization channel deserialization for std::strings. static Error deserialize(RawByteChannel &C, std::string &S) { uint64_t Count = 0; if (auto Err = deserializeSeq(C, Count)) @@ -177,8 +176,8 @@ public: } }; -} // end namespace rpc +} // end namespace shared } // end namespace orc } // end namespace llvm -#endif // LLVM_EXECUTIONENGINE_ORC_RAWBYTECHANNEL_H +#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h index 2f37ab40c7f8..f2d07632bd5d 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h @@ -1,4 +1,4 @@ -//===- llvm/ExecutionEngine/Orc/RPC/RPCSerialization.h --------------*- C++ -*-===// +//===- Serialization.h ------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,10 +6,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_RPCSERIALIZATION_H -#define LLVM_EXECUTIONENGINE_ORC_RPCSERIALIZATION_H +#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H +#define LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H -#include "llvm/ExecutionEngine/Orc/OrcError.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h" #include "llvm/Support/thread.h" #include <map> #include <mutex> @@ -20,118 +22,104 @@ namespace llvm { namespace orc { -namespace rpc { +namespace shared { -template <typename T> -class RPCTypeName; +template <typename T> class SerializationTypeName; /// TypeNameSequence is a utility for rendering sequences of types to a string /// by rendering each type, separated by ", ". -template <typename... ArgTs> class RPCTypeNameSequence {}; +template <typename... ArgTs> class SerializationTypeNameSequence {}; /// Render an empty TypeNameSequence to an ostream. template <typename OStream> -OStream &operator<<(OStream &OS, const RPCTypeNameSequence<> &V) { +OStream &operator<<(OStream &OS, const SerializationTypeNameSequence<> &V) { return OS; } /// Render a TypeNameSequence of a single type to an ostream. template <typename OStream, typename ArgT> -OStream &operator<<(OStream &OS, const RPCTypeNameSequence<ArgT> &V) { - OS << RPCTypeName<ArgT>::getName(); +OStream &operator<<(OStream &OS, const SerializationTypeNameSequence<ArgT> &V) { + OS << SerializationTypeName<ArgT>::getName(); return OS; } /// Render a TypeNameSequence of more than one type to an ostream. template <typename OStream, typename ArgT1, typename ArgT2, typename... ArgTs> -OStream& -operator<<(OStream &OS, const RPCTypeNameSequence<ArgT1, ArgT2, ArgTs...> &V) { - OS << RPCTypeName<ArgT1>::getName() << ", " - << RPCTypeNameSequence<ArgT2, ArgTs...>(); +OStream & +operator<<(OStream &OS, + const SerializationTypeNameSequence<ArgT1, ArgT2, ArgTs...> &V) { + OS << SerializationTypeName<ArgT1>::getName() << ", " + << SerializationTypeNameSequence<ArgT2, ArgTs...>(); return OS; } -template <> -class RPCTypeName<void> { +template <> class SerializationTypeName<void> { public: - static const char* getName() { return "void"; } + static const char *getName() { return "void"; } }; -template <> -class RPCTypeName<int8_t> { +template <> class SerializationTypeName<int8_t> { public: - static const char* getName() { return "int8_t"; } + static const char *getName() { return "int8_t"; } }; -template <> -class RPCTypeName<uint8_t> { +template <> class SerializationTypeName<uint8_t> { public: - static const char* getName() { return "uint8_t"; } + static const char *getName() { return "uint8_t"; } }; -template <> -class RPCTypeName<int16_t> { +template <> class SerializationTypeName<int16_t> { public: - static const char* getName() { return "int16_t"; } + static const char *getName() { return "int16_t"; } }; -template <> -class RPCTypeName<uint16_t> { +template <> class SerializationTypeName<uint16_t> { public: - static const char* getName() { return "uint16_t"; } + static const char *getName() { return "uint16_t"; } }; -template <> -class RPCTypeName<int32_t> { +template <> class SerializationTypeName<int32_t> { public: - static const char* getName() { return "int32_t"; } + static const char *getName() { return "int32_t"; } }; -template <> -class RPCTypeName<uint32_t> { +template <> class SerializationTypeName<uint32_t> { public: - static const char* getName() { return "uint32_t"; } + static const char *getName() { return "uint32_t"; } }; -template <> -class RPCTypeName<int64_t> { +template <> class SerializationTypeName<int64_t> { public: - static const char* getName() { return "int64_t"; } + static const char *getName() { return "int64_t"; } }; -template <> -class RPCTypeName<uint64_t> { +template <> class SerializationTypeName<uint64_t> { public: - static const char* getName() { return "uint64_t"; } + static const char *getName() { return "uint64_t"; } }; -template <> -class RPCTypeName<bool> { +template <> class SerializationTypeName<bool> { public: - static const char* getName() { return "bool"; } + static const char *getName() { return "bool"; } }; -template <> -class RPCTypeName<std::string> { +template <> class SerializationTypeName<std::string> { public: - static const char* getName() { return "std::string"; } + static const char *getName() { return "std::string"; } }; -template <> -class RPCTypeName<Error> { +template <> class SerializationTypeName<Error> { public: - static const char* getName() { return "Error"; } + static const char *getName() { return "Error"; } }; -template <typename T> -class RPCTypeName<Expected<T>> { +template <typename T> class SerializationTypeName<Expected<T>> { public: - static const char* getName() { + static const char *getName() { static std::string Name = [] { std::string Name; - raw_string_ostream(Name) << "Expected<" - << RPCTypeNameSequence<T>() - << ">"; + raw_string_ostream(Name) + << "Expected<" << SerializationTypeNameSequence<T>() << ">"; return Name; }(); return Name.data(); @@ -139,67 +127,78 @@ public: }; template <typename T1, typename T2> -class RPCTypeName<std::pair<T1, T2>> { +class SerializationTypeName<std::pair<T1, T2>> { +public: + static const char *getName() { + static std::string Name = [] { + std::string Name; + raw_string_ostream(Name) + << "std::pair<" << SerializationTypeNameSequence<T1, T2>() << ">"; + return Name; + }(); + return Name.data(); + } +}; + +template <typename... ArgTs> class SerializationTypeName<std::tuple<ArgTs...>> { public: - static const char* getName() { + static const char *getName() { static std::string Name = [] { std::string Name; - raw_string_ostream(Name) << "std::pair<" << RPCTypeNameSequence<T1, T2>() - << ">"; + raw_string_ostream(Name) + << "std::tuple<" << SerializationTypeNameSequence<ArgTs...>() << ">"; return Name; }(); return Name.data(); } }; -template <typename... ArgTs> -class RPCTypeName<std::tuple<ArgTs...>> { +template <typename T> class SerializationTypeName<Optional<T>> { public: - static const char* getName() { + static const char *getName() { static std::string Name = [] { std::string Name; - raw_string_ostream(Name) << "std::tuple<" - << RPCTypeNameSequence<ArgTs...>() << ">"; + raw_string_ostream(Name) + << "Optional<" << SerializationTypeName<T>::getName() << ">"; return Name; }(); return Name.data(); } }; -template <typename T> -class RPCTypeName<std::vector<T>> { +template <typename T> class SerializationTypeName<std::vector<T>> { public: - static const char*getName() { + static const char *getName() { static std::string Name = [] { std::string Name; - raw_string_ostream(Name) << "std::vector<" << RPCTypeName<T>::getName() - << ">"; + raw_string_ostream(Name) + << "std::vector<" << SerializationTypeName<T>::getName() << ">"; return Name; }(); return Name.data(); } }; -template <typename T> class RPCTypeName<std::set<T>> { +template <typename T> class SerializationTypeName<std::set<T>> { public: static const char *getName() { static std::string Name = [] { std::string Name; raw_string_ostream(Name) - << "std::set<" << RPCTypeName<T>::getName() << ">"; + << "std::set<" << SerializationTypeName<T>::getName() << ">"; return Name; }(); return Name.data(); } }; -template <typename K, typename V> class RPCTypeName<std::map<K, V>> { +template <typename K, typename V> class SerializationTypeName<std::map<K, V>> { public: static const char *getName() { static std::string Name = [] { std::string Name; raw_string_ostream(Name) - << "std::map<" << RPCTypeNameSequence<K, V>() << ">"; + << "std::map<" << SerializationTypeNameSequence<K, V>() << ">"; return Name; }(); return Name.data(); @@ -242,8 +241,7 @@ template <typename ChannelT, typename WireType, typename ConcreteType = WireType, typename = void> class SerializationTraits; -template <typename ChannelT> -class SequenceTraits { +template <typename ChannelT> class SequenceTraits { public: static Error emitSeparator(ChannelT &C) { return Error::success(); } static Error consumeSeparator(ChannelT &C) { return Error::success(); } @@ -258,11 +256,9 @@ public: /// is a SerializationTraits specialization /// SerializeTraits<ChannelT, ArgT, CArgT> with methods that can serialize the /// caller argument to over-the-wire value. -template <typename ChannelT, typename... ArgTs> -class SequenceSerialization; +template <typename ChannelT, typename... ArgTs> class SequenceSerialization; -template <typename ChannelT> -class SequenceSerialization<ChannelT> { +template <typename ChannelT> class SequenceSerialization<ChannelT> { public: static Error serialize(ChannelT &C) { return Error::success(); } static Error deserialize(ChannelT &C) { return Error::success(); } @@ -271,15 +267,12 @@ public: template <typename ChannelT, typename ArgT> class SequenceSerialization<ChannelT, ArgT> { public: - - template <typename CArgT> - static Error serialize(ChannelT &C, CArgT &&CArg) { + template <typename CArgT> static Error serialize(ChannelT &C, CArgT &&CArg) { return SerializationTraits<ChannelT, ArgT, std::decay_t<CArgT>>::serialize( C, std::forward<CArgT>(CArg)); } - template <typename CArgT> - static Error deserialize(ChannelT &C, CArgT &CArg) { + template <typename CArgT> static Error deserialize(ChannelT &C, CArgT &CArg) { return SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg); } }; @@ -287,25 +280,22 @@ public: template <typename ChannelT, typename ArgT, typename... ArgTs> class SequenceSerialization<ChannelT, ArgT, ArgTs...> { public: - template <typename CArgT, typename... CArgTs> - static Error serialize(ChannelT &C, CArgT &&CArg, - CArgTs &&... CArgs) { + static Error serialize(ChannelT &C, CArgT &&CArg, CArgTs &&...CArgs) { if (auto Err = SerializationTraits<ChannelT, ArgT, std::decay_t<CArgT>>::serialize( C, std::forward<CArgT>(CArg))) return Err; if (auto Err = SequenceTraits<ChannelT>::emitSeparator(C)) return Err; - return SequenceSerialization<ChannelT, ArgTs...>:: - serialize(C, std::forward<CArgTs>(CArgs)...); + return SequenceSerialization<ChannelT, ArgTs...>::serialize( + C, std::forward<CArgTs>(CArgs)...); } template <typename CArgT, typename... CArgTs> - static Error deserialize(ChannelT &C, CArgT &CArg, - CArgTs &... CArgs) { + static Error deserialize(ChannelT &C, CArgT &CArg, CArgTs &...CArgs) { if (auto Err = - SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg)) + SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg)) return Err; if (auto Err = SequenceTraits<ChannelT>::consumeSeparator(C)) return Err; @@ -314,25 +304,23 @@ public: }; template <typename ChannelT, typename... ArgTs> -Error serializeSeq(ChannelT &C, ArgTs &&... Args) { +Error serializeSeq(ChannelT &C, ArgTs &&...Args) { return SequenceSerialization<ChannelT, std::decay_t<ArgTs>...>::serialize( C, std::forward<ArgTs>(Args)...); } template <typename ChannelT, typename... ArgTs> -Error deserializeSeq(ChannelT &C, ArgTs &... Args) { +Error deserializeSeq(ChannelT &C, ArgTs &...Args) { return SequenceSerialization<ChannelT, ArgTs...>::deserialize(C, Args...); } -template <typename ChannelT> -class SerializationTraits<ChannelT, Error> { +template <typename ChannelT> class SerializationTraits<ChannelT, Error> { public: - using WrappedErrorSerializer = - std::function<Error(ChannelT &C, const ErrorInfoBase&)>; + std::function<Error(ChannelT &C, const ErrorInfoBase &)>; using WrappedErrorDeserializer = - std::function<Error(ChannelT &C, Error &Err)>; + std::function<Error(ChannelT &C, Error &Err)>; template <typename ErrorInfoT, typename SerializeFtor, typename DeserializeFtor> @@ -343,15 +331,14 @@ public: const std::string *KeyName = nullptr; { - // We're abusing the stability of std::map here: We take a reference to the - // key of the deserializers map to save us from duplicating the string in - // the serializer. This should be changed to use a stringpool if we switch - // to a map type that may move keys in memory. + // We're abusing the stability of std::map here: We take a reference to + // the key of the deserializers map to save us from duplicating the string + // in the serializer. This should be changed to use a stringpool if we + // switch to a map type that may move keys in memory. std::lock_guard<std::recursive_mutex> Lock(DeserializersMutex); - auto I = - Deserializers.insert(Deserializers.begin(), - std::make_pair(std::move(Name), - std::move(Deserialize))); + auto I = Deserializers.insert( + Deserializers.begin(), + std::make_pair(std::move(Name), std::move(Deserialize))); KeyName = &I->first; } @@ -376,13 +363,12 @@ public: if (!Err) return serializeSeq(C, std::string()); - return handleErrors(std::move(Err), - [&C](const ErrorInfoBase &EIB) { - auto SI = Serializers.find(EIB.dynamicClassID()); - if (SI == Serializers.end()) - return serializeAsStringError(C, EIB); - return (SI->second)(C, EIB); - }); + return handleErrors(std::move(Err), [&C](const ErrorInfoBase &EIB) { + auto SI = Serializers.find(EIB.dynamicClassID()); + if (SI == Serializers.end()) + return serializeAsStringError(C, EIB); + return (SI->second)(C, EIB); + }); } static Error deserialize(ChannelT &C, Error &Err) { @@ -404,7 +390,6 @@ public: } private: - static Error serializeAsStringError(ChannelT &C, const ErrorInfoBase &EIB) { std::string ErrMsg; { @@ -417,7 +402,7 @@ private: static std::recursive_mutex SerializersMutex; static std::recursive_mutex DeserializersMutex; - static std::map<const void*, WrappedErrorSerializer> Serializers; + static std::map<const void *, WrappedErrorSerializer> Serializers; static std::map<std::string, WrappedErrorDeserializer> Deserializers; }; @@ -428,14 +413,14 @@ template <typename ChannelT> std::recursive_mutex SerializationTraits<ChannelT, Error>::DeserializersMutex; template <typename ChannelT> -std::map<const void*, +std::map<const void *, typename SerializationTraits<ChannelT, Error>::WrappedErrorSerializer> -SerializationTraits<ChannelT, Error>::Serializers; + SerializationTraits<ChannelT, Error>::Serializers; template <typename ChannelT> -std::map<std::string, - typename SerializationTraits<ChannelT, Error>::WrappedErrorDeserializer> -SerializationTraits<ChannelT, Error>::Deserializers; +std::map<std::string, typename SerializationTraits< + ChannelT, Error>::WrappedErrorDeserializer> + SerializationTraits<ChannelT, Error>::Deserializers; /// Registers a serializer and deserializer for the given error type on the /// given channel type. @@ -444,32 +429,29 @@ template <typename ChannelT, typename ErrorInfoT, typename SerializeFtor, void registerErrorSerialization(std::string Name, SerializeFtor &&Serialize, DeserializeFtor &&Deserialize) { SerializationTraits<ChannelT, Error>::template registerErrorType<ErrorInfoT>( - std::move(Name), - std::forward<SerializeFtor>(Serialize), - std::forward<DeserializeFtor>(Deserialize)); + std::move(Name), std::forward<SerializeFtor>(Serialize), + std::forward<DeserializeFtor>(Deserialize)); } /// Registers serialization/deserialization for StringError. -template <typename ChannelT> -void registerStringError() { +template <typename ChannelT> void registerStringError() { static bool AlreadyRegistered = false; if (!AlreadyRegistered) { registerErrorSerialization<ChannelT, StringError>( - "StringError", - [](ChannelT &C, const StringError &SE) { - return serializeSeq(C, SE.getMessage()); - }, - [](ChannelT &C, Error &Err) -> Error { - ErrorAsOutParameter EAO(&Err); - std::string Msg; - if (auto E2 = deserializeSeq(C, Msg)) - return E2; - Err = - make_error<StringError>(std::move(Msg), - orcError( - OrcErrorCode::UnknownErrorCodeFromRemote)); - return Error::success(); - }); + "StringError", + [](ChannelT &C, const StringError &SE) { + return serializeSeq(C, SE.getMessage()); + }, + [](ChannelT &C, Error &Err) -> Error { + ErrorAsOutParameter EAO(&Err); + std::string Msg; + if (auto E2 = deserializeSeq(C, Msg)) + return E2; + Err = make_error<StringError>( + std::move(Msg), + orcError(OrcErrorCode::UnknownErrorCodeFromRemote)); + return Error::success(); + }); AlreadyRegistered = true; } } @@ -478,7 +460,6 @@ void registerStringError() { template <typename ChannelT, typename T1, typename T2> class SerializationTraits<ChannelT, Expected<T1>, Expected<T2>> { public: - static Error serialize(ChannelT &C, Expected<T2> &&ValOrErr) { if (ValOrErr) { if (auto Err = serializeSeq(C, true)) @@ -509,7 +490,6 @@ public: template <typename ChannelT, typename T1, typename T2> class SerializationTraits<ChannelT, Expected<T1>, T2> { public: - static Error serialize(ChannelT &C, T2 &&Val) { return serializeSeq(C, Expected<T2>(std::forward<T2>(Val))); } @@ -519,7 +499,6 @@ public: template <typename ChannelT, typename T> class SerializationTraits<ChannelT, Expected<T>, Error> { public: - static Error serialize(ChannelT &C, Error &&Err) { return serializeSeq(C, Expected<T>(std::move(Err))); } @@ -547,7 +526,6 @@ public: template <typename ChannelT, typename... ArgTs> class SerializationTraits<ChannelT, std::tuple<ArgTs...>> { public: - /// RPC channel serialization for std::tuple. static Error serialize(ChannelT &C, const std::tuple<ArgTs...> &V) { return serializeTupleHelper(C, V, std::index_sequence_for<ArgTs...>()); @@ -574,11 +552,35 @@ private: } }; +template <typename ChannelT, typename T> +class SerializationTraits<ChannelT, Optional<T>> { +public: + /// Serialize an Optional<T>. + static Error serialize(ChannelT &C, const Optional<T> &O) { + if (auto Err = serializeSeq(C, O != None)) + return Err; + if (O) + if (auto Err = serializeSeq(C, *O)) + return Err; + return Error::success(); + } + + /// Deserialize an Optional<T>. + static Error deserialize(ChannelT &C, Optional<T> &O) { + bool HasValue = false; + if (auto Err = deserializeSeq(C, HasValue)) + return Err; + if (HasValue) + if (auto Err = deserializeSeq(C, *O)) + return Err; + return Error::success(); + }; +}; + /// SerializationTraits default specialization for std::vector. template <typename ChannelT, typename T> class SerializationTraits<ChannelT, std::vector<T>> { public: - /// Serialize a std::vector<T> from std::vector<T>. static Error serialize(ChannelT &C, const std::vector<T> &V) { if (auto Err = serializeSeq(C, static_cast<uint64_t>(V.size()))) @@ -609,6 +611,22 @@ public: } }; +/// Enable vector serialization from an ArrayRef. +template <typename ChannelT, typename T> +class SerializationTraits<ChannelT, std::vector<T>, ArrayRef<T>> { +public: + static Error serialize(ChannelT &C, ArrayRef<T> V) { + if (auto Err = serializeSeq(C, static_cast<uint64_t>(V.size()))) + return Err; + + for (const auto &E : V) + if (auto Err = serializeSeq(C, E)) + return Err; + + return Error::success(); + } +}; + template <typename ChannelT, typename T, typename T2> class SerializationTraits<ChannelT, std::set<T>, std::set<T2>> { public: @@ -695,8 +713,57 @@ public: } }; -} // end namespace rpc +template <typename ChannelT, typename K, typename V, typename K2, typename V2> +class SerializationTraits<ChannelT, std::map<K, V>, DenseMap<K2, V2>> { +public: + /// Serialize a std::map<K, V> from DenseMap<K2, V2>. + static Error serialize(ChannelT &C, const DenseMap<K2, V2> &M) { + if (auto Err = serializeSeq(C, static_cast<uint64_t>(M.size()))) + return Err; + + for (auto &E : M) { + if (auto Err = + SerializationTraits<ChannelT, K, K2>::serialize(C, E.first)) + return Err; + + if (auto Err = + SerializationTraits<ChannelT, V, V2>::serialize(C, E.second)) + return Err; + } + + return Error::success(); + } + + /// Serialize a std::map<K, V> from DenseMap<K2, V2>. + static Error deserialize(ChannelT &C, DenseMap<K2, V2> &M) { + assert(M.empty() && "Expected default-constructed map to deserialize into"); + + uint64_t Count = 0; + if (auto Err = deserializeSeq(C, Count)) + return Err; + + while (Count-- != 0) { + std::pair<K2, V2> Val; + if (auto Err = + SerializationTraits<ChannelT, K, K2>::deserialize(C, Val.first)) + return Err; + + if (auto Err = + SerializationTraits<ChannelT, V, V2>::deserialize(C, Val.second)) + return Err; + + auto Added = M.insert(Val).second; + if (!Added) + return make_error<StringError>("Duplicate element in deserialized map", + orcError(OrcErrorCode::UnknownORCError)); + } + + return Error::success(); + } +}; + +} // namespace shared } // end namespace orc } // end namespace llvm -#endif // LLVM_EXECUTIONENGINE_ORC_RPCSERIALIZATION_H +#endif // LLVM_EXECUTIONENGINE_ORC_RPC_RPCSERIALIZATION_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h new file mode 100644 index 000000000000..d01b3ef21f80 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h @@ -0,0 +1,165 @@ +//===--- TargetProcessControlTypes.h -- Shared Core/TPC types ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// TargetProcessControl types that are used by both the Orc and +// OrcTargetProcess libraries. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_TARGETPROCESSCONTROLTYPES_H +#define LLVM_EXECUTIONENGINE_ORC_SHARED_TARGETPROCESSCONTROLTYPES_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ExecutionEngine/JITSymbol.h" + +#include <vector> + +namespace llvm { +namespace orc { +namespace tpctypes { + +template <typename T> struct UIntWrite { + UIntWrite() = default; + UIntWrite(JITTargetAddress Address, T Value) + : Address(Address), Value(Value) {} + + JITTargetAddress Address = 0; + T Value = 0; +}; + +/// Describes a write to a uint8_t. +using UInt8Write = UIntWrite<uint8_t>; + +/// Describes a write to a uint16_t. +using UInt16Write = UIntWrite<uint16_t>; + +/// Describes a write to a uint32_t. +using UInt32Write = UIntWrite<uint32_t>; + +/// Describes a write to a uint64_t. +using UInt64Write = UIntWrite<uint64_t>; + +/// Describes a write to a buffer. +/// For use with TargetProcessControl::MemoryAccess objects. +struct BufferWrite { + BufferWrite() = default; + BufferWrite(JITTargetAddress Address, StringRef Buffer) + : Address(Address), Buffer(Buffer) {} + + JITTargetAddress Address = 0; + StringRef Buffer; +}; + +/// A handle used to represent a loaded dylib in the target process. +using DylibHandle = JITTargetAddress; + +using LookupResult = std::vector<JITTargetAddress>; + +/// Either a uint8_t array or a uint8_t*. +union CWrapperFunctionResultData { + uint8_t Value[8]; + uint8_t *ValuePtr; +}; + +/// C ABI compatible wrapper function result. +/// +/// This can be safely returned from extern "C" functions, but should be used +/// to construct a WrapperFunctionResult for safety. +struct CWrapperFunctionResult { + uint64_t Size; + CWrapperFunctionResultData Data; + void (*Destroy)(CWrapperFunctionResultData Data, uint64_t Size); +}; + +/// C++ wrapper function result: Same as CWrapperFunctionResult but +/// auto-releases memory. +class WrapperFunctionResult { +public: + /// Create a default WrapperFunctionResult. + WrapperFunctionResult() { zeroInit(R); } + + /// Create a WrapperFunctionResult from a CWrapperFunctionResult. This + /// instance takes ownership of the result object and will automatically + /// call the Destroy member upon destruction. + WrapperFunctionResult(CWrapperFunctionResult R) : R(R) {} + + WrapperFunctionResult(const WrapperFunctionResult &) = delete; + WrapperFunctionResult &operator=(const WrapperFunctionResult &) = delete; + + WrapperFunctionResult(WrapperFunctionResult &&Other) { + zeroInit(R); + std::swap(R, Other.R); + } + + WrapperFunctionResult &operator=(WrapperFunctionResult &&Other) { + CWrapperFunctionResult Tmp; + zeroInit(Tmp); + std::swap(Tmp, Other.R); + std::swap(R, Tmp); + return *this; + } + + ~WrapperFunctionResult() { + if (R.Destroy) + R.Destroy(R.Data, R.Size); + } + + /// Relinquish ownership of and return the CWrapperFunctionResult. + CWrapperFunctionResult release() { + CWrapperFunctionResult Tmp; + zeroInit(Tmp); + std::swap(R, Tmp); + return Tmp; + } + + /// Get an ArrayRef covering the data in the result. + ArrayRef<uint8_t> getData() const { + if (R.Size <= 8) + return ArrayRef<uint8_t>(R.Data.Value, R.Size); + return ArrayRef<uint8_t>(R.Data.ValuePtr, R.Size); + } + + /// Create a WrapperFunctionResult from the given integer, provided its + /// size is no greater than 64 bits. + template <typename T, + typename _ = std::enable_if_t<std::is_integral<T>::value && + sizeof(T) <= sizeof(uint64_t)>> + static WrapperFunctionResult from(T Value) { + CWrapperFunctionResult R; + R.Size = sizeof(T); + memcpy(&R.Data.Value, Value, R.Size); + R.Destroy = nullptr; + return R; + } + + /// Create a WrapperFunctionResult from the given string. + static WrapperFunctionResult from(StringRef S); + + /// Always free Data.ValuePtr by calling free on it. + static void destroyWithFree(CWrapperFunctionResultData Data, uint64_t Size); + + /// Always free Data.ValuePtr by calling delete[] on it. + static void destroyWithDeleteArray(CWrapperFunctionResultData Data, + uint64_t Size); + +private: + static void zeroInit(CWrapperFunctionResult &R) { + R.Size = 0; + R.Data.ValuePtr = nullptr; + R.Destroy = nullptr; + } + + CWrapperFunctionResult R; +}; + +} // end namespace tpctypes +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_TARGETPROCESSCONTROLTYPES_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h index d8213d3b35e8..a138f60a7756 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -18,14 +18,10 @@ #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/DebugUtils.h" #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" -#include "llvm/IR/PassManager.h" -#include "llvm/Passes/PassBuilder.h" #include "llvm/Support/Debug.h" - #include <mutex> #include <type_traits> #include <utility> -#include <vector> namespace llvm { namespace orc { @@ -185,7 +181,8 @@ public: : IRLayer(ES, BaseLayer.getManglingOptions()), NextLayer(BaseLayer), S(Spec), Mangle(Mangle), QueryAnalysis(Interpreter) {} - void emit(MaterializationResponsibility R, ThreadSafeModule TSM); + void emit(std::unique_ptr<MaterializationResponsibility> R, + ThreadSafeModule TSM) override; private: TargetAndLikelies diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h b/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h new file mode 100644 index 000000000000..ed4f6080bb4e --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.h @@ -0,0 +1,66 @@ +//===------------ TPCDynamicLibrarySearchGenerator.h ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Support loading and searching of dynamic libraries in a target process via +// the TargetProcessControl class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TPCDYNAMICLIBRARYSEARCHGENERATOR_H +#define LLVM_EXECUTIONENGINE_ORC_TPCDYNAMICLIBRARYSEARCHGENERATOR_H + +#include "llvm/ADT/FunctionExtras.h" +#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" + +namespace llvm { +namespace orc { + +class TPCDynamicLibrarySearchGenerator : public DefinitionGenerator { +public: + using SymbolPredicate = unique_function<bool(const SymbolStringPtr &)>; + + /// Create a DynamicLibrarySearchGenerator that searches for symbols in the + /// library with the given handle. + /// + /// If the Allow predicate is given then only symbols matching the predicate + /// will be searched for. If the predicate is not given then all symbols will + /// be searched for. + TPCDynamicLibrarySearchGenerator(TargetProcessControl &TPC, + tpctypes::DylibHandle H, + SymbolPredicate Allow = SymbolPredicate()) + : TPC(TPC), H(H), Allow(std::move(Allow)) {} + + /// Permanently loads the library at the given path and, on success, returns + /// a DynamicLibrarySearchGenerator that will search it for symbol definitions + /// in the library. On failure returns the reason the library failed to load. + static Expected<std::unique_ptr<TPCDynamicLibrarySearchGenerator>> + Load(TargetProcessControl &TPC, const char *LibraryPath, + SymbolPredicate Allow = SymbolPredicate()); + + /// Creates a TPCDynamicLibrarySearchGenerator that searches for symbols in + /// the target process. + static Expected<std::unique_ptr<TPCDynamicLibrarySearchGenerator>> + GetForTargetProcess(TargetProcessControl &TPC, + SymbolPredicate Allow = SymbolPredicate()) { + return Load(TPC, nullptr, std::move(Allow)); + } + + Error tryToGenerate(LookupState &LS, LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &Symbols) override; + +private: + TargetProcessControl &TPC; + tpctypes::DylibHandle H; + SymbolPredicate Allow; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_TPCDYNAMICLIBRARYSEARCHGENERATOR_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h new file mode 100644 index 000000000000..519f818907f9 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TPCEHFrameRegistrar.h @@ -0,0 +1,54 @@ +//===-- TPCEHFrameRegistrar.h - TPC based eh-frame registration -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// TargetProcessControl based eh-frame registration. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TPCEHFRAMEREGISTRAR_H +#define LLVM_EXECUTIONENGINE_ORC_TPCEHFRAMEREGISTRAR_H + +#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h" +#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" + +namespace llvm { +namespace orc { + +/// Register/Deregisters EH frames in a remote process via a +/// TargetProcessControl instance. +class TPCEHFrameRegistrar : public jitlink::EHFrameRegistrar { +public: + /// Create from a TargetProcessControl instance alone. This will use + /// the TPC's lookupSymbols method to find the registration/deregistration + /// funciton addresses by name. + static Expected<std::unique_ptr<TPCEHFrameRegistrar>> + Create(TargetProcessControl &TPC); + + /// Create a TPCEHFrameRegistrar with the given TargetProcessControl + /// object and registration/deregistration function addresses. + TPCEHFrameRegistrar(TargetProcessControl &TPC, + JITTargetAddress RegisterEHFrameWrapperFnAddr, + JITTargetAddress DeregisterEHFRameWrapperFnAddr) + : TPC(TPC), RegisterEHFrameWrapperFnAddr(RegisterEHFrameWrapperFnAddr), + DeregisterEHFrameWrapperFnAddr(DeregisterEHFRameWrapperFnAddr) {} + + Error registerEHFrames(JITTargetAddress EHFrameSectionAddr, + size_t EHFrameSectionSize) override; + Error deregisterEHFrames(JITTargetAddress EHFrameSectionAddr, + size_t EHFrameSectionSize) override; + +private: + TargetProcessControl &TPC; + JITTargetAddress RegisterEHFrameWrapperFnAddr; + JITTargetAddress DeregisterEHFrameWrapperFnAddr; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_TPCEHFRAMEREGISTRAR_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h new file mode 100644 index 000000000000..e7abd7fb90df --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h @@ -0,0 +1,222 @@ +//===--- TPCIndirectionUtils.h - TPC based indirection utils ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Indirection utilities (stubs, trampolines, lazy call-throughs) that use the +// TargetProcessControl API to interact with the target process. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TPCINDIRECTIONUTILS_H +#define LLVM_EXECUTIONENGINE_ORC_TPCINDIRECTIONUTILS_H + +#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" +#include "llvm/ExecutionEngine/Orc/LazyReexports.h" + +#include <mutex> + +namespace llvm { +namespace orc { + +class TargetProcessControl; + +/// Provides TargetProcessControl based indirect stubs, trampoline pool and +/// lazy call through manager. +class TPCIndirectionUtils { + friend class TPCIndirectionUtilsAccess; + +public: + /// ABI support base class. Used to write resolver, stub, and trampoline + /// blocks. + class ABISupport { + protected: + ABISupport(unsigned PointerSize, unsigned TrampolineSize, unsigned StubSize, + unsigned StubToPointerMaxDisplacement, unsigned ResolverCodeSize) + : PointerSize(PointerSize), TrampolineSize(TrampolineSize), + StubSize(StubSize), + StubToPointerMaxDisplacement(StubToPointerMaxDisplacement), + ResolverCodeSize(ResolverCodeSize) {} + + public: + virtual ~ABISupport(); + + unsigned getPointerSize() const { return PointerSize; } + unsigned getTrampolineSize() const { return TrampolineSize; } + unsigned getStubSize() const { return StubSize; } + unsigned getStubToPointerMaxDisplacement() const { + return StubToPointerMaxDisplacement; + } + unsigned getResolverCodeSize() const { return ResolverCodeSize; } + + virtual void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddr, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) const = 0; + + virtual void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTragetAddr, + JITTargetAddress ResolverAddr, + unsigned NumTrampolines) const = 0; + + virtual void + writeIndirectStubsBlock(char *StubsBlockWorkingMem, + JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, + unsigned NumStubs) const = 0; + + private: + unsigned PointerSize = 0; + unsigned TrampolineSize = 0; + unsigned StubSize = 0; + unsigned StubToPointerMaxDisplacement = 0; + unsigned ResolverCodeSize = 0; + }; + + /// Create using the given ABI class. + template <typename ORCABI> + static std::unique_ptr<TPCIndirectionUtils> + CreateWithABI(TargetProcessControl &TPC); + + /// Create based on the TargetProcessControl triple. + static Expected<std::unique_ptr<TPCIndirectionUtils>> + Create(TargetProcessControl &TPC); + + /// Return a reference to the TargetProcessControl object. + TargetProcessControl &getTargetProcessControl() const { return TPC; } + + /// Return a reference to the ABISupport object for this instance. + ABISupport &getABISupport() const { return *ABI; } + + /// Release memory for resources held by this instance. This *must* be called + /// prior to destruction of the class. + Error cleanup(); + + /// Write resolver code to the target process and return its address. + /// This must be called before any call to createTrampolinePool or + /// createLazyCallThroughManager. + Expected<JITTargetAddress> + writeResolverBlock(JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr); + + /// Returns the address of the Resolver block. Returns zero if the + /// writeResolverBlock method has not previously been called. + JITTargetAddress getResolverBlockAddress() const { return ResolverBlockAddr; } + + /// Create an IndirectStubsManager for the target process. + std::unique_ptr<IndirectStubsManager> createIndirectStubsManager(); + + /// Create a TrampolinePool for the target process. + TrampolinePool &getTrampolinePool(); + + /// Create a LazyCallThroughManager. + /// This function should only be called once. + LazyCallThroughManager & + createLazyCallThroughManager(ExecutionSession &ES, + JITTargetAddress ErrorHandlerAddr); + + /// Create a LazyCallThroughManager for the target process. + LazyCallThroughManager &getLazyCallThroughManager() { + assert(LCTM && "createLazyCallThroughManager must be called first"); + return *LCTM; + } + +private: + using Allocation = jitlink::JITLinkMemoryManager::Allocation; + + struct IndirectStubInfo { + IndirectStubInfo() = default; + IndirectStubInfo(JITTargetAddress StubAddress, + JITTargetAddress PointerAddress) + : StubAddress(StubAddress), PointerAddress(PointerAddress) {} + JITTargetAddress StubAddress = 0; + JITTargetAddress PointerAddress = 0; + }; + + using IndirectStubInfoVector = std::vector<IndirectStubInfo>; + + /// Create a TPCIndirectionUtils instance. + TPCIndirectionUtils(TargetProcessControl &TPC, + std::unique_ptr<ABISupport> ABI); + + Expected<IndirectStubInfoVector> getIndirectStubs(unsigned NumStubs); + + std::mutex TPCUIMutex; + TargetProcessControl &TPC; + std::unique_ptr<ABISupport> ABI; + JITTargetAddress ResolverBlockAddr; + std::unique_ptr<jitlink::JITLinkMemoryManager::Allocation> ResolverBlock; + std::unique_ptr<TrampolinePool> TP; + std::unique_ptr<LazyCallThroughManager> LCTM; + + std::vector<IndirectStubInfo> AvailableIndirectStubs; + std::vector<std::unique_ptr<Allocation>> IndirectStubAllocs; +}; + +/// This will call writeResolver on the given TPCIndirectionUtils instance +/// to set up re-entry via a function that will directly return the trampoline +/// landing address. +/// +/// The TPCIndirectionUtils' LazyCallThroughManager must have been previously +/// created via TPCIndirectionUtils::createLazyCallThroughManager. +/// +/// The TPCIndirectionUtils' writeResolver method must not have been previously +/// called. +/// +/// This function is experimental and likely subject to revision. +Error setUpInProcessLCTMReentryViaTPCIU(TPCIndirectionUtils &TPCIU); + +namespace detail { + +template <typename ORCABI> +class ABISupportImpl : public TPCIndirectionUtils::ABISupport { +public: + ABISupportImpl() + : ABISupport(ORCABI::PointerSize, ORCABI::TrampolineSize, + ORCABI::StubSize, ORCABI::StubToPointerMaxDisplacement, + ORCABI::ResolverCodeSize) {} + + void writeResolverCode(char *ResolverWorkingMem, + JITTargetAddress ResolverTargetAddr, + JITTargetAddress ReentryFnAddr, + JITTargetAddress ReentryCtxAddr) const override { + ORCABI::writeResolverCode(ResolverWorkingMem, ResolverTargetAddr, + ReentryFnAddr, ReentryCtxAddr); + } + + void writeTrampolines(char *TrampolineBlockWorkingMem, + JITTargetAddress TrampolineBlockTargetAddr, + JITTargetAddress ResolverAddr, + unsigned NumTrampolines) const override { + ORCABI::writeTrampolines(TrampolineBlockWorkingMem, + TrampolineBlockTargetAddr, ResolverAddr, + NumTrampolines); + } + + void writeIndirectStubsBlock(char *StubsBlockWorkingMem, + JITTargetAddress StubsBlockTargetAddress, + JITTargetAddress PointersBlockTargetAddress, + unsigned NumStubs) const override { + ORCABI::writeIndirectStubsBlock(StubsBlockWorkingMem, + StubsBlockTargetAddress, + PointersBlockTargetAddress, NumStubs); + } +}; + +} // end namespace detail + +template <typename ORCABI> +std::unique_ptr<TPCIndirectionUtils> +TPCIndirectionUtils::CreateWithABI(TargetProcessControl &TPC) { + return std::unique_ptr<TPCIndirectionUtils>(new TPCIndirectionUtils( + TPC, std::make_unique<detail::ABISupportImpl<ORCABI>>())); +} + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_TPCINDIRECTIONUTILS_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h new file mode 100644 index 000000000000..253e06ba0ba1 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h @@ -0,0 +1,620 @@ +//===-- OrcRPCTPCServer.h -- OrcRPCTargetProcessControl Server --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// OrcRPCTargetProcessControl server class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H + +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h" +#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h" +#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/Process.h" + +#include <atomic> + +namespace llvm { +namespace orc { + +namespace orcrpctpc { + +enum WireProtectionFlags : uint8_t { + WPF_None = 0, + WPF_Read = 1U << 0, + WPF_Write = 1U << 1, + WPF_Exec = 1U << 2, + LLVM_MARK_AS_BITMASK_ENUM(WPF_Exec) +}; + +/// Convert from sys::Memory::ProtectionFlags +inline WireProtectionFlags +toWireProtectionFlags(sys::Memory::ProtectionFlags PF) { + WireProtectionFlags WPF = WPF_None; + if (PF & sys::Memory::MF_READ) + WPF |= WPF_Read; + if (PF & sys::Memory::MF_WRITE) + WPF |= WPF_Write; + if (PF & sys::Memory::MF_EXEC) + WPF |= WPF_Exec; + return WPF; +} + +inline sys::Memory::ProtectionFlags +fromWireProtectionFlags(WireProtectionFlags WPF) { + int PF = 0; + if (WPF & WPF_Read) + PF |= sys::Memory::MF_READ; + if (WPF & WPF_Write) + PF |= sys::Memory::MF_WRITE; + if (WPF & WPF_Exec) + PF |= sys::Memory::MF_EXEC; + return static_cast<sys::Memory::ProtectionFlags>(PF); +} + +struct ReserveMemRequestElement { + WireProtectionFlags Prot = WPF_None; + uint64_t Size = 0; + uint64_t Alignment = 0; +}; + +using ReserveMemRequest = std::vector<ReserveMemRequestElement>; + +struct ReserveMemResultElement { + WireProtectionFlags Prot = WPF_None; + JITTargetAddress Address = 0; + uint64_t AllocatedSize = 0; +}; + +using ReserveMemResult = std::vector<ReserveMemResultElement>; + +struct ReleaseOrFinalizeMemRequestElement { + WireProtectionFlags Prot = WPF_None; + JITTargetAddress Address = 0; + uint64_t Size = 0; +}; + +using ReleaseOrFinalizeMemRequest = + std::vector<ReleaseOrFinalizeMemRequestElement>; + +} // end namespace orcrpctpc + +namespace shared { + +template <> class SerializationTypeName<tpctypes::UInt8Write> { +public: + static const char *getName() { return "UInt8Write"; } +}; + +template <> class SerializationTypeName<tpctypes::UInt16Write> { +public: + static const char *getName() { return "UInt16Write"; } +}; + +template <> class SerializationTypeName<tpctypes::UInt32Write> { +public: + static const char *getName() { return "UInt32Write"; } +}; + +template <> class SerializationTypeName<tpctypes::UInt64Write> { +public: + static const char *getName() { return "UInt64Write"; } +}; + +template <> class SerializationTypeName<tpctypes::BufferWrite> { +public: + static const char *getName() { return "BufferWrite"; } +}; + +template <> class SerializationTypeName<orcrpctpc::ReserveMemRequestElement> { +public: + static const char *getName() { return "ReserveMemRequestElement"; } +}; + +template <> class SerializationTypeName<orcrpctpc::ReserveMemResultElement> { +public: + static const char *getName() { return "ReserveMemResultElement"; } +}; + +template <> +class SerializationTypeName<orcrpctpc::ReleaseOrFinalizeMemRequestElement> { +public: + static const char *getName() { return "ReleaseOrFinalizeMemRequestElement"; } +}; + +template <> class SerializationTypeName<tpctypes::WrapperFunctionResult> { +public: + static const char *getName() { return "WrapperFunctionResult"; } +}; + +template <typename ChannelT, typename WriteT> +class SerializationTraits< + ChannelT, WriteT, WriteT, + std::enable_if_t<std::is_same<WriteT, tpctypes::UInt8Write>::value || + std::is_same<WriteT, tpctypes::UInt16Write>::value || + std::is_same<WriteT, tpctypes::UInt32Write>::value || + std::is_same<WriteT, tpctypes::UInt64Write>::value>> { +public: + static Error serialize(ChannelT &C, const WriteT &W) { + return serializeSeq(C, W.Address, W.Value); + } + static Error deserialize(ChannelT &C, WriteT &W) { + return deserializeSeq(C, W.Address, W.Value); + } +}; + +template <typename ChannelT> +class SerializationTraits< + ChannelT, tpctypes::BufferWrite, tpctypes::BufferWrite, + std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> { +public: + static Error serialize(ChannelT &C, const tpctypes::BufferWrite &W) { + uint64_t Size = W.Buffer.size(); + if (auto Err = serializeSeq(C, W.Address, Size)) + return Err; + + return C.appendBytes(W.Buffer.data(), Size); + } + static Error deserialize(ChannelT &C, tpctypes::BufferWrite &W) { + JITTargetAddress Address; + uint64_t Size; + + if (auto Err = deserializeSeq(C, Address, Size)) + return Err; + + char *Buffer = jitTargetAddressToPointer<char *>(Address); + + if (auto Err = C.readBytes(Buffer, Size)) + return Err; + + W = {Address, StringRef(Buffer, Size)}; + return Error::success(); + } +}; + +template <typename ChannelT> +class SerializationTraits<ChannelT, orcrpctpc::ReserveMemRequestElement> { +public: + static Error serialize(ChannelT &C, + const orcrpctpc::ReserveMemRequestElement &E) { + return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Size, E.Alignment); + } + + static Error deserialize(ChannelT &C, + orcrpctpc::ReserveMemRequestElement &E) { + return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Size, + E.Alignment); + } +}; + +template <typename ChannelT> +class SerializationTraits<ChannelT, orcrpctpc::ReserveMemResultElement> { +public: + static Error serialize(ChannelT &C, + const orcrpctpc::ReserveMemResultElement &E) { + return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Address, + E.AllocatedSize); + } + + static Error deserialize(ChannelT &C, orcrpctpc::ReserveMemResultElement &E) { + return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Address, + E.AllocatedSize); + } +}; + +template <typename ChannelT> +class SerializationTraits<ChannelT, + orcrpctpc::ReleaseOrFinalizeMemRequestElement> { +public: + static Error + serialize(ChannelT &C, + const orcrpctpc::ReleaseOrFinalizeMemRequestElement &E) { + return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Address, E.Size); + } + + static Error deserialize(ChannelT &C, + orcrpctpc::ReleaseOrFinalizeMemRequestElement &E) { + return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Address, + E.Size); + } +}; + +template <typename ChannelT> +class SerializationTraits< + ChannelT, tpctypes::WrapperFunctionResult, tpctypes::WrapperFunctionResult, + std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> { +public: + static Error serialize(ChannelT &C, + const tpctypes::WrapperFunctionResult &E) { + auto Data = E.getData(); + if (auto Err = serializeSeq(C, static_cast<uint64_t>(Data.size()))) + return Err; + if (Data.size() == 0) + return Error::success(); + return C.appendBytes(reinterpret_cast<const char *>(Data.data()), + Data.size()); + } + + static Error deserialize(ChannelT &C, tpctypes::WrapperFunctionResult &E) { + tpctypes::CWrapperFunctionResult R; + + R.Size = 0; + R.Data.ValuePtr = nullptr; + R.Destroy = nullptr; + + if (auto Err = deserializeSeq(C, R.Size)) + return Err; + if (R.Size == 0) + return Error::success(); + R.Data.ValuePtr = new uint8_t[R.Size]; + if (auto Err = + C.readBytes(reinterpret_cast<char *>(R.Data.ValuePtr), R.Size)) { + R.Destroy = tpctypes::WrapperFunctionResult::destroyWithDeleteArray; + return Err; + } + + E = tpctypes::WrapperFunctionResult(R); + return Error::success(); + } +}; + +} // end namespace shared + +namespace orcrpctpc { + +using RemoteSymbolLookupSet = std::vector<std::pair<std::string, bool>>; +using RemoteLookupRequest = + std::pair<tpctypes::DylibHandle, RemoteSymbolLookupSet>; + +class GetTargetTriple + : public shared::RPCFunction<GetTargetTriple, std::string()> { +public: + static const char *getName() { return "GetTargetTriple"; } +}; + +class GetPageSize : public shared::RPCFunction<GetPageSize, uint64_t()> { +public: + static const char *getName() { return "GetPageSize"; } +}; + +class ReserveMem + : public shared::RPCFunction<ReserveMem, Expected<ReserveMemResult>( + ReserveMemRequest)> { +public: + static const char *getName() { return "ReserveMem"; } +}; + +class FinalizeMem + : public shared::RPCFunction<FinalizeMem, + Error(ReleaseOrFinalizeMemRequest)> { +public: + static const char *getName() { return "FinalizeMem"; } +}; + +class ReleaseMem + : public shared::RPCFunction<ReleaseMem, + Error(ReleaseOrFinalizeMemRequest)> { +public: + static const char *getName() { return "ReleaseMem"; } +}; + +class WriteUInt8s + : public shared::RPCFunction<WriteUInt8s, + Error(std::vector<tpctypes::UInt8Write>)> { +public: + static const char *getName() { return "WriteUInt8s"; } +}; + +class WriteUInt16s + : public shared::RPCFunction<WriteUInt16s, + Error(std::vector<tpctypes::UInt16Write>)> { +public: + static const char *getName() { return "WriteUInt16s"; } +}; + +class WriteUInt32s + : public shared::RPCFunction<WriteUInt32s, + Error(std::vector<tpctypes::UInt32Write>)> { +public: + static const char *getName() { return "WriteUInt32s"; } +}; + +class WriteUInt64s + : public shared::RPCFunction<WriteUInt64s, + Error(std::vector<tpctypes::UInt64Write>)> { +public: + static const char *getName() { return "WriteUInt64s"; } +}; + +class WriteBuffers + : public shared::RPCFunction<WriteBuffers, + Error(std::vector<tpctypes::BufferWrite>)> { +public: + static const char *getName() { return "WriteBuffers"; } +}; + +class LoadDylib + : public shared::RPCFunction<LoadDylib, Expected<tpctypes::DylibHandle>( + std::string DylibPath)> { +public: + static const char *getName() { return "LoadDylib"; } +}; + +class LookupSymbols + : public shared::RPCFunction<LookupSymbols, + Expected<std::vector<tpctypes::LookupResult>>( + std::vector<RemoteLookupRequest>)> { +public: + static const char *getName() { return "LookupSymbols"; } +}; + +class RunMain + : public shared::RPCFunction<RunMain, + int32_t(JITTargetAddress MainAddr, + std::vector<std::string> Args)> { +public: + static const char *getName() { return "RunMain"; } +}; + +class RunWrapper + : public shared::RPCFunction<RunWrapper, + tpctypes::WrapperFunctionResult( + JITTargetAddress, std::vector<uint8_t>)> { +public: + static const char *getName() { return "RunWrapper"; } +}; + +class CloseConnection : public shared::RPCFunction<CloseConnection, void()> { +public: + static const char *getName() { return "CloseConnection"; } +}; + +} // end namespace orcrpctpc + +/// TargetProcessControl for a process connected via an ORC RPC Endpoint. +template <typename RPCEndpointT> class OrcRPCTPCServer { +public: + /// Create an OrcRPCTPCServer from the given endpoint. + OrcRPCTPCServer(RPCEndpointT &EP) : EP(EP) { + using ThisT = OrcRPCTPCServer<RPCEndpointT>; + + TripleStr = sys::getProcessTriple(); + PageSize = sys::Process::getPageSizeEstimate(); + + EP.template addHandler<orcrpctpc::GetTargetTriple>(*this, + &ThisT::getTargetTriple); + EP.template addHandler<orcrpctpc::GetPageSize>(*this, &ThisT::getPageSize); + + EP.template addHandler<orcrpctpc::ReserveMem>(*this, &ThisT::reserveMemory); + EP.template addHandler<orcrpctpc::FinalizeMem>(*this, + &ThisT::finalizeMemory); + EP.template addHandler<orcrpctpc::ReleaseMem>(*this, &ThisT::releaseMemory); + + EP.template addHandler<orcrpctpc::WriteUInt8s>( + handleWriteUInt<tpctypes::UInt8Write>); + EP.template addHandler<orcrpctpc::WriteUInt16s>( + handleWriteUInt<tpctypes::UInt16Write>); + EP.template addHandler<orcrpctpc::WriteUInt32s>( + handleWriteUInt<tpctypes::UInt32Write>); + EP.template addHandler<orcrpctpc::WriteUInt64s>( + handleWriteUInt<tpctypes::UInt64Write>); + EP.template addHandler<orcrpctpc::WriteBuffers>(handleWriteBuffer); + + EP.template addHandler<orcrpctpc::LoadDylib>(*this, &ThisT::loadDylib); + EP.template addHandler<orcrpctpc::LookupSymbols>(*this, + &ThisT::lookupSymbols); + + EP.template addHandler<orcrpctpc::RunMain>(*this, &ThisT::runMain); + EP.template addHandler<orcrpctpc::RunWrapper>(*this, &ThisT::runWrapper); + + EP.template addHandler<orcrpctpc::CloseConnection>(*this, + &ThisT::closeConnection); + } + + /// Set the ProgramName to be used as the first argv element when running + /// functions via runAsMain. + void setProgramName(Optional<std::string> ProgramName = None) { + this->ProgramName = std::move(ProgramName); + } + + /// Get the RPC endpoint for this server. + RPCEndpointT &getEndpoint() { return EP; } + + /// Run the server loop. + Error run() { + while (!Finished) { + if (auto Err = EP.handleOne()) + return Err; + } + return Error::success(); + } + +private: + std::string getTargetTriple() { return TripleStr; } + uint64_t getPageSize() { return PageSize; } + + template <typename WriteT> + static void handleWriteUInt(const std::vector<WriteT> &Ws) { + using ValueT = decltype(std::declval<WriteT>().Value); + for (auto &W : Ws) + *jitTargetAddressToPointer<ValueT *>(W.Address) = W.Value; + } + + std::string getProtStr(orcrpctpc::WireProtectionFlags WPF) { + std::string Result; + Result += (WPF & orcrpctpc::WPF_Read) ? 'R' : '-'; + Result += (WPF & orcrpctpc::WPF_Write) ? 'W' : '-'; + Result += (WPF & orcrpctpc::WPF_Exec) ? 'X' : '-'; + return Result; + } + + static void handleWriteBuffer(const std::vector<tpctypes::BufferWrite> &Ws) { + for (auto &W : Ws) { + memcpy(jitTargetAddressToPointer<char *>(W.Address), W.Buffer.data(), + W.Buffer.size()); + } + } + + Expected<orcrpctpc::ReserveMemResult> + reserveMemory(const orcrpctpc::ReserveMemRequest &Request) { + orcrpctpc::ReserveMemResult Allocs; + auto PF = sys::Memory::MF_READ | sys::Memory::MF_WRITE; + + uint64_t TotalSize = 0; + + for (const auto &E : Request) { + uint64_t Size = alignTo(E.Size, PageSize); + uint16_t Align = E.Alignment; + + if ((Align > PageSize) || (PageSize % Align)) + return make_error<StringError>( + "Page alignmen does not satisfy requested alignment", + inconvertibleErrorCode()); + + TotalSize += Size; + } + + // Allocate memory slab. + std::error_code EC; + auto MB = sys::Memory::allocateMappedMemory(TotalSize, nullptr, PF, EC); + if (EC) + return make_error<StringError>("Unable to allocate memory: " + + EC.message(), + inconvertibleErrorCode()); + + // Zero-fill the whole thing. + memset(MB.base(), 0, MB.allocatedSize()); + + // Carve up sections to return. + uint64_t SectionBase = 0; + for (const auto &E : Request) { + uint64_t SectionSize = alignTo(E.Size, PageSize); + Allocs.push_back({E.Prot, + pointerToJITTargetAddress(MB.base()) + SectionBase, + SectionSize}); + SectionBase += SectionSize; + } + + return Allocs; + } + + Error finalizeMemory(const orcrpctpc::ReleaseOrFinalizeMemRequest &FMR) { + for (const auto &E : FMR) { + sys::MemoryBlock MB(jitTargetAddressToPointer<void *>(E.Address), E.Size); + + auto PF = orcrpctpc::fromWireProtectionFlags(E.Prot); + if (auto EC = + sys::Memory::protectMappedMemory(MB, static_cast<unsigned>(PF))) + return make_error<StringError>("error protecting memory: " + + EC.message(), + inconvertibleErrorCode()); + } + return Error::success(); + } + + Error releaseMemory(const orcrpctpc::ReleaseOrFinalizeMemRequest &RMR) { + for (const auto &E : RMR) { + sys::MemoryBlock MB(jitTargetAddressToPointer<void *>(E.Address), E.Size); + + if (auto EC = sys::Memory::releaseMappedMemory(MB)) + return make_error<StringError>("error release memory: " + EC.message(), + inconvertibleErrorCode()); + } + return Error::success(); + } + + Expected<tpctypes::DylibHandle> loadDylib(const std::string &Path) { + std::string ErrMsg; + const char *DLPath = !Path.empty() ? Path.c_str() : nullptr; + auto DL = sys::DynamicLibrary::getPermanentLibrary(DLPath, &ErrMsg); + if (!DL.isValid()) + return make_error<StringError>(std::move(ErrMsg), + inconvertibleErrorCode()); + + tpctypes::DylibHandle H = Dylibs.size(); + Dylibs[H] = std::move(DL); + return H; + } + + Expected<std::vector<tpctypes::LookupResult>> + lookupSymbols(const std::vector<orcrpctpc::RemoteLookupRequest> &Request) { + std::vector<tpctypes::LookupResult> Result; + + for (const auto &E : Request) { + auto I = Dylibs.find(E.first); + if (I == Dylibs.end()) + return make_error<StringError>("Unrecognized handle", + inconvertibleErrorCode()); + auto &DL = I->second; + Result.push_back({}); + + for (const auto &KV : E.second) { + auto &SymString = KV.first; + bool WeakReference = KV.second; + + const char *Sym = SymString.c_str(); +#ifdef __APPLE__ + if (*Sym == '_') + ++Sym; +#endif + + void *Addr = DL.getAddressOfSymbol(Sym); + if (!Addr && !WeakReference) + return make_error<StringError>(Twine("Missing definition for ") + Sym, + inconvertibleErrorCode()); + + Result.back().push_back(pointerToJITTargetAddress(Addr)); + } + } + + return Result; + } + + int32_t runMain(JITTargetAddress MainFnAddr, + const std::vector<std::string> &Args) { + Optional<StringRef> ProgramNameOverride; + if (ProgramName) + ProgramNameOverride = *ProgramName; + + return runAsMain( + jitTargetAddressToFunction<int (*)(int, char *[])>(MainFnAddr), Args, + ProgramNameOverride); + } + + tpctypes::WrapperFunctionResult + runWrapper(JITTargetAddress WrapperFnAddr, + const std::vector<uint8_t> &ArgBuffer) { + using WrapperFnTy = tpctypes::CWrapperFunctionResult (*)( + const uint8_t *Data, uint64_t Size); + auto *WrapperFn = jitTargetAddressToFunction<WrapperFnTy>(WrapperFnAddr); + return WrapperFn(ArgBuffer.data(), ArgBuffer.size()); + } + + void closeConnection() { Finished = true; } + + std::string TripleStr; + uint64_t PageSize = 0; + Optional<std::string> ProgramName; + RPCEndpointT &EP; + std::atomic<bool> Finished{false}; + DenseMap<tpctypes::DylibHandle, sys::DynamicLibrary> Dylibs; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h new file mode 100644 index 000000000000..811c50e3ce4d --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h @@ -0,0 +1,41 @@ +//===----- RegisterEHFrames.h -- Register EH frame sections -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Support for dynamically registering and deregistering eh-frame sections +// in-process via libunwind. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_REGISTEREHFRAMES_H +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_REGISTEREHFRAMES_H + +#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h" +#include "llvm/Support/Error.h" +#include <vector> + +namespace llvm { +namespace orc { + +/// Register frames in the given eh-frame section with libunwind. +Error registerEHFrameSection(const void *EHFrameSectionAddr, + size_t EHFrameSectionSize); + +/// Unregister frames in the given eh-frame section with libunwind. +Error deregisterEHFrameSection(const void *EHFrameSectionAddr, + size_t EHFrameSectionSize); + +} // end namespace orc +} // end namespace llvm + +extern "C" llvm::orc::tpctypes::CWrapperFunctionResult +llvm_orc_registerEHFrameSectionWrapper(uint8_t *Data, uint64_t Size); + +extern "C" llvm::orc::tpctypes::CWrapperFunctionResult +llvm_orc_deregisterEHFrameSectionWrapper(uint8_t *Data, uint64_t Size); + +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_REGISTEREHFRAMES_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h new file mode 100644 index 000000000000..1d2f6d2be089 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h @@ -0,0 +1,38 @@ +//===-- TargetExecutionUtils.h - Utils for execution in target --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utilities for execution in the target process. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_TARGETEXECUTIONUTILS_H +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_TARGETEXECUTIONUTILS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include <string> + +namespace llvm { +namespace orc { + +/// Run a main function, returning the result. +/// +/// If the optional ProgramName argument is given then it will be inserted +/// before the strings in Args as the first argument to the called function. +/// +/// It is legal to have an empty argument list and no program name, however +/// many main functions will expect a name argument at least, and will fail +/// if none is provided. +int runAsMain(int (*Main)(int, char *[]), ArrayRef<std::string> Args, + Optional<StringRef> ProgramName = None); + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_TARGETEXECUTIONUTILS_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h new file mode 100644 index 000000000000..b60b1ca6e372 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h @@ -0,0 +1,218 @@ +//===--- TargetProcessControl.h - Target process control APIs ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utilities for interacting with target processes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/MSVCErrorWorkarounds.h" + +#include <future> +#include <vector> + +namespace llvm { +namespace orc { + +/// TargetProcessControl supports interaction with a JIT target process. +class TargetProcessControl { +public: + /// APIs for manipulating memory in the target process. + class MemoryAccess { + public: + /// Callback function for asynchronous writes. + using WriteResultFn = unique_function<void(Error)>; + + virtual ~MemoryAccess(); + + virtual void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws, + WriteResultFn OnWriteComplete) = 0; + + virtual void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws, + WriteResultFn OnWriteComplete) = 0; + + virtual void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws, + WriteResultFn OnWriteComplete) = 0; + + virtual void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws, + WriteResultFn OnWriteComplete) = 0; + + virtual void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws, + WriteResultFn OnWriteComplete) = 0; + + Error writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws) { + std::promise<MSVCPError> ResultP; + auto ResultF = ResultP.get_future(); + writeUInt8s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + return ResultF.get(); + } + + Error writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws) { + std::promise<MSVCPError> ResultP; + auto ResultF = ResultP.get_future(); + writeUInt16s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + return ResultF.get(); + } + + Error writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws) { + std::promise<MSVCPError> ResultP; + auto ResultF = ResultP.get_future(); + writeUInt32s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + return ResultF.get(); + } + + Error writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws) { + std::promise<MSVCPError> ResultP; + auto ResultF = ResultP.get_future(); + writeUInt64s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + return ResultF.get(); + } + + Error writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws) { + std::promise<MSVCPError> ResultP; + auto ResultF = ResultP.get_future(); + writeBuffers(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + return ResultF.get(); + } + }; + + /// A pair of a dylib and a set of symbols to be looked up. + struct LookupRequest { + LookupRequest(tpctypes::DylibHandle Handle, const SymbolLookupSet &Symbols) + : Handle(Handle), Symbols(Symbols) {} + tpctypes::DylibHandle Handle; + const SymbolLookupSet &Symbols; + }; + + virtual ~TargetProcessControl(); + + /// Intern a symbol name in the SymbolStringPool. + SymbolStringPtr intern(StringRef SymName) { return SSP->intern(SymName); } + + /// Return a shared pointer to the SymbolStringPool for this instance. + std::shared_ptr<SymbolStringPool> getSymbolStringPool() const { return SSP; } + + /// Return the Triple for the target process. + const Triple &getTargetTriple() const { return TargetTriple; } + + /// Get the page size for the target process. + unsigned getPageSize() const { return PageSize; } + + /// Return a MemoryAccess object for the target process. + MemoryAccess &getMemoryAccess() const { return *MemAccess; } + + /// Return a JITLinkMemoryManager for the target process. + jitlink::JITLinkMemoryManager &getMemMgr() const { return *MemMgr; } + + /// Load the dynamic library at the given path and return a handle to it. + /// If LibraryPath is null this function will return the global handle for + /// the target process. + virtual Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) = 0; + + /// Search for symbols in the target process. + /// + /// The result of the lookup is a 2-dimentional array of target addresses + /// that correspond to the lookup order. If a required symbol is not + /// found then this method will return an error. If a weakly referenced + /// symbol is not found then it be assigned a '0' value in the result. + /// that correspond to the lookup order. + virtual Expected<std::vector<tpctypes::LookupResult>> + lookupSymbols(ArrayRef<LookupRequest> Request) = 0; + + /// Run function with a main-like signature. + virtual Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr, + ArrayRef<std::string> Args) = 0; + + /// Run a wrapper function with signature: + /// + /// \code{.cpp} + /// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size); + /// \endcode{.cpp} + /// + virtual Expected<tpctypes::WrapperFunctionResult> + runWrapper(JITTargetAddress WrapperFnAddr, ArrayRef<uint8_t> ArgBuffer) = 0; + + /// Disconnect from the target process. + /// + /// This should be called after the JIT session is shut down. + virtual Error disconnect() = 0; + +protected: + TargetProcessControl(std::shared_ptr<SymbolStringPool> SSP) + : SSP(std::move(SSP)) {} + + std::shared_ptr<SymbolStringPool> SSP; + Triple TargetTriple; + unsigned PageSize = 0; + MemoryAccess *MemAccess = nullptr; + jitlink::JITLinkMemoryManager *MemMgr = nullptr; +}; + +/// A TargetProcessControl implementation targeting the current process. +class SelfTargetProcessControl : public TargetProcessControl, + private TargetProcessControl::MemoryAccess { +public: + SelfTargetProcessControl( + std::shared_ptr<SymbolStringPool> SSP, Triple TargetTriple, + unsigned PageSize, std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr); + + /// Create a SelfTargetProcessControl with the given memory manager. + /// If no memory manager is given a jitlink::InProcessMemoryManager will + /// be used by default. + static Expected<std::unique_ptr<SelfTargetProcessControl>> + Create(std::shared_ptr<SymbolStringPool> SSP, + std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr = nullptr); + + Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override; + + Expected<std::vector<tpctypes::LookupResult>> + lookupSymbols(ArrayRef<LookupRequest> Request) override; + + Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr, + ArrayRef<std::string> Args) override; + + Expected<tpctypes::WrapperFunctionResult> + runWrapper(JITTargetAddress WrapperFnAddr, + ArrayRef<uint8_t> ArgBuffer) override; + + Error disconnect() override; + +private: + void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws, + WriteResultFn OnWriteComplete) override; + + void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws, + WriteResultFn OnWriteComplete) override; + + void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws, + WriteResultFn OnWriteComplete) override; + + void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws, + WriteResultFn OnWriteComplete) override; + + void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws, + WriteResultFn OnWriteComplete) override; + + std::unique_ptr<jitlink::JITLinkMemoryManager> OwnedMemMgr; + char GlobalManglingPrefix = 0; + std::vector<std::unique_ptr<sys::DynamicLibrary>> DynamicLibraries; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h b/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h index 58c96737e580..82f2b7464953 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h @@ -162,7 +162,7 @@ using GVModifier = std::function<void(GlobalValue &)>; /// Clones the given module on to a new context. ThreadSafeModule -cloneToNewContext(ThreadSafeModule &TSMW, +cloneToNewContext(const ThreadSafeModule &TSMW, GVPredicate ShouldCloneDef = GVPredicate(), GVModifier UpdateClonedDefSource = GVModifier()); diff --git a/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h b/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h index 1b3ce1127e4a..9b83092e653f 100644 --- a/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -271,11 +271,11 @@ private: object::OwningBinary<object::ObjectFile> O, RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver, bool ProcessAllSections, - unique_function<Error(const object::ObjectFile &Obj, - std::unique_ptr<LoadedObjectInfo>, + unique_function<Error(const object::ObjectFile &Obj, LoadedObjectInfo &, std::map<StringRef, JITEvaluatedSymbol>)> OnLoaded, - unique_function<void(object::OwningBinary<object::ObjectFile> O, Error)> + unique_function<void(object::OwningBinary<object::ObjectFile> O, + std::unique_ptr<LoadedObjectInfo>, Error)> OnEmitted); // RuntimeDyldImpl is the actual class. RuntimeDyld is just the public @@ -298,10 +298,11 @@ void jitLinkForORC( RuntimeDyld::MemoryManager &MemMgr, JITSymbolResolver &Resolver, bool ProcessAllSections, unique_function<Error(const object::ObjectFile &Obj, - std::unique_ptr<RuntimeDyld::LoadedObjectInfo>, + RuntimeDyld::LoadedObjectInfo &, std::map<StringRef, JITEvaluatedSymbol>)> OnLoaded, - unique_function<void(object::OwningBinary<object::ObjectFile>, Error)> + unique_function<void(object::OwningBinary<object::ObjectFile>, + std::unique_ptr<RuntimeDyld::LoadedObjectInfo>, Error)> OnEmitted); } // end namespace llvm diff --git a/llvm/include/llvm/Support/FileCheck.h b/llvm/include/llvm/FileCheck/FileCheck.h index 2f0e641394d5..b44ab025694b 100644 --- a/llvm/include/llvm/Support/FileCheck.h +++ b/llvm/include/llvm/FileCheck/FileCheck.h @@ -1,4 +1,4 @@ -//==-- llvm/Support/FileCheck.h ---------------------------*- C++ -*-==// +//==-- llvm/FileCheck/FileCheck.h --------------------------------*- C++ -*-==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,13 +10,14 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_FILECHECK_H -#define LLVM_SUPPORT_FILECHECK_H +#ifndef LLVM_FILECHECK_FILECHECK_H +#define LLVM_FILECHECK_FILECHECK_H #include "llvm/ADT/StringRef.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" #include "llvm/Support/SourceMgr.h" +#include <bitset> #include <string> #include <vector> @@ -30,6 +31,7 @@ struct FileCheckRequest { std::vector<StringRef> ImplicitCheckNot; std::vector<StringRef> GlobalDefines; bool AllowEmptyInput = false; + bool AllowUnusedPrefixes = false; bool MatchFullLines = false; bool IgnoreCase = false; bool IsDefaultCheckPrefix = false; @@ -39,10 +41,6 @@ struct FileCheckRequest { bool VerboseVerbose = false; }; -//===----------------------------------------------------------------------===// -// Summary of a FileCheck diagnostic. -//===----------------------------------------------------------------------===// - namespace Check { enum FileCheckKind { @@ -67,12 +65,23 @@ enum FileCheckKind { CheckBadCount }; +enum FileCheckKindModifier { + /// Modifies directive to perform literal match. + ModifierLiteral = 0, + + // The number of modifier. + Size +}; + class FileCheckType { FileCheckKind Kind; int Count; ///< optional Count for some checks + /// Modifers for the check directive. + std::bitset<FileCheckKindModifier::Size> Modifiers; public: - FileCheckType(FileCheckKind Kind = CheckNone) : Kind(Kind), Count(1) {} + FileCheckType(FileCheckKind Kind = CheckNone) + : Kind(Kind), Count(1), Modifiers() {} FileCheckType(const FileCheckType &) = default; FileCheckType &operator=(const FileCheckType &) = default; @@ -81,11 +90,23 @@ public: int getCount() const { return Count; } FileCheckType &setCount(int C); + bool isLiteralMatch() const { + return Modifiers[FileCheckKindModifier::ModifierLiteral]; + } + FileCheckType &setLiteralMatch(bool Literal = true) { + Modifiers.set(FileCheckKindModifier::ModifierLiteral, Literal); + return *this; + } + // \returns a description of \p Prefix. std::string getDescription(StringRef Prefix) const; + + // \returns a description of \p Modifiers. + std::string getModifiersDescription() const; }; } // namespace Check +/// Summary of a FileCheck diagnostic. struct FileCheckDiag { /// What is the FileCheck directive for this diagnostic? Check::FileCheckType CheckTy; @@ -131,8 +152,12 @@ struct FileCheckDiag { unsigned InputStartCol; unsigned InputEndLine; unsigned InputEndCol; + /// A note to replace the one normally indicated by MatchTy, or the empty + /// string if none. + std::string Note; FileCheckDiag(const SourceMgr &SM, const Check::FileCheckType &CheckTy, - SMLoc CheckLoc, MatchType MatchTy, SMRange InputRange); + SMLoc CheckLoc, MatchType MatchTy, SMRange InputRange, + StringRef Note = ""); }; class FileCheckPatternContext; diff --git a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td index 26049ca60db3..e40f40f74c73 100644 --- a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td +++ b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td @@ -35,10 +35,10 @@ class DirectiveLanguage { // Make the enum values available in the namespace. This allows us to // write something like Enum_X if we have a `using namespace cppNamespace`. - bit makeEnumAvailableInNamespace = 0; + bit makeEnumAvailableInNamespace = false; // Generate include and macro to enable LLVM BitmaskEnum. - bit enableBitmaskEnumInNamespace = 0; + bit enableBitmaskEnumInNamespace = false; // Header file included in the implementation code generated. Ususally the // output file of the declaration code generation. Can be left blank. @@ -46,6 +46,24 @@ class DirectiveLanguage { // EnumSet class name used for clauses to generated the allowed clauses map. string clauseEnumSetClass = ""; + + // Class holding the clauses in the flang parse-tree. + string flangClauseBaseClass = ""; +} + +// Information about values accepted by enum-like clauses +class ClauseVal<string n, int v, bit uv> { + // Name of the clause value. + string name = n; + + // Integer value of the clause. + int value = v; + + // Can user specify this value? + bit isUserValue = uv; + + // Set clause value used by default when unknown. + bit isDefault = false; } // Information about a specific clause. @@ -57,18 +75,32 @@ class Clause<string c> { string alternativeName = ""; // Optional class holding value of the clause in clang AST. - string clangClass = ?; + string clangClass = ""; // Optional class holding value of the clause in flang AST. - string flangClass = ?; + string flangClass = ""; + + // If set to true, value is optional. Not optional by default. + bit isValueOptional = false; + + // Name of enum when there is a list of allowed clause values. + string enumClauseValue = ""; + + // List of allowed clause values + list<ClauseVal> allowedClauseValues = []; + // If set to true, value class is part of a list. Single class by default. + bit isValueList = false; + + // Define a default value such as "*". + string defaultValue = ""; // Is clause implicit? If clause is set as implicit, the default kind will // be return in get<LanguageName>ClauseKind instead of their own kind. - bit isImplicit = 0; + bit isImplicit = false; - // Set directive used by default when unknown. Function returning the kind + // Set clause used by default when unknown. Function returning the kind // of enumeration will use this clause as the default. - bit isDefault = 0; + bit isDefault = false; } // Hold information about clause validity by version. @@ -92,6 +124,10 @@ class Directive<string d> { // function. string alternativeName = ""; + // Clauses cannot appear twice in the three allowed lists below. Also, since + // required implies allowed, the same clause cannot appear in both the + // allowedClauses and requiredClauses lists. + // List of allowed clauses for the directive. list<VersionedClause> allowedClauses = []; @@ -105,5 +141,5 @@ class Directive<string d> { list<VersionedClause> requiredClauses = []; // Set directive used by default when unknown. - bit isDefault = 0; + bit isDefault = false; } diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td index e96b7e846662..6045a9ac2af0 100644 --- a/llvm/include/llvm/Frontend/OpenACC/ACC.td +++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This is the definition file for OpenACC directives and clauses. +// This is the definition file for OpenACC 3.1 directives and clauses. // //===----------------------------------------------------------------------===// @@ -21,32 +21,34 @@ def OpenACC : DirectiveLanguage { let cppNamespace = "acc"; // final namespace will be llvm::acc let directivePrefix = "ACCD_"; let clausePrefix = "ACCC_"; - let makeEnumAvailableInNamespace = 1; - let enableBitmaskEnumInNamespace = 1; + let makeEnumAvailableInNamespace = true; + let enableBitmaskEnumInNamespace = true; let includeHeader = "llvm/Frontend/OpenACC/ACC.h.inc"; let clauseEnumSetClass = "AccClauseSet"; + let flangClauseBaseClass = "AccClause"; } //===----------------------------------------------------------------------===// // Definition of OpenACC clauses //===----------------------------------------------------------------------===// -// 2.9.6 -def ACCC_Auto : Clause<"auto"> {} - // 2.16.1 def ACCC_Async : Clause<"async"> { - let flangClass = "std::optional<ScalarIntExpr>"; + let flangClass = "ScalarIntExpr"; + let isValueOptional = true; } -// 2.7.11 +// 2.9.7 +def ACCC_Auto : Clause<"auto"> {} + +// 2.7.12 def ACCC_Attach : Clause<"attach"> { let flangClass = "AccObjectList"; } // 2.15.1 def ACCC_Bind : Clause<"bind"> { - let flangClass = "Name"; + let flangClass = "AccBindClause"; } // 2.12 @@ -58,41 +60,49 @@ def ACCC_Collapse : Clause<"collapse"> { let flangClass = "ScalarIntConstantExpr"; } -// 2.7.5 +// 2.7.6 def ACCC_Copy : Clause<"copy"> { let flangClass = "AccObjectList"; } -// 2.7.6 +// 2.7.7 def ACCC_Copyin : Clause<"copyin"> { let flangClass = "AccObjectListWithModifier"; } -// 2.7.7 +// 2.7.8 def ACCC_Copyout : Clause<"copyout"> { let flangClass = "AccObjectListWithModifier"; } -// 2.7.8 +// 2.7.9 def ACCC_Create : Clause<"create"> { let flangClass = "AccObjectListWithModifier"; } -// 2.5.14 +// 2.5.15 +def ACC_Default_none : ClauseVal<"none", 1, 1> { let isDefault = 1; } +def ACC_Default_present : ClauseVal<"present", 0, 1> {} + def ACCC_Default : Clause<"default"> { let flangClass = "AccDefaultClause"; + let enumClauseValue = "DefaultValue"; + let allowedClauseValues = [ + ACC_Default_present, + ACC_Default_none + ]; } -// 2.4.12 +// 2.14.3 def ACCC_DefaultAsync : Clause<"default_async"> { let flangClass = "ScalarIntExpr"; } -// 2.7.10 +// 2.7.11 def ACCC_Delete : Clause<"delete"> { let flangClass = "AccObjectList"; } -// 2.7.12 +// 2.7.13 def ACCC_Detach : Clause<"detach"> { let flangClass = "AccObjectList"; } @@ -102,38 +112,41 @@ def ACCC_Device : Clause<"device"> { let flangClass = "AccObjectList"; } -// 2.14.1 +// 2.14.1 - 2.14.2 def ACCC_DeviceNum : Clause<"device_num"> { - let flangClass = "ScalarIntConstantExpr"; + let flangClass = "ScalarIntExpr"; } -// 2.7.3 +// 2.7.4 def ACCC_DevicePtr : Clause<"deviceptr"> { let flangClass = "AccObjectList"; } -// 2.13 +// 2.13.1 def ACCC_DeviceResident : Clause<"device_resident"> { let flangClass = "AccObjectList"; } // 2.4 def ACCC_DeviceType : Clause<"device_type"> { - // (DeviceType, "*" - let flangClass = "std::optional<std::list<Name>>"; + let flangClass = "ScalarIntExpr"; + let defaultValue = "*"; + let isValueOptional = true; + let isValueList = true; } // 2.6.6 def ACCC_Finalize : Clause<"finalize"> {} -// 2.5.12 +// 2.5.13 def ACCC_FirstPrivate : Clause<"firstprivate"> { let flangClass = "AccObjectList"; } // 2.9.2 def ACCC_Gang : Clause<"gang"> { - let flangClass = "std::optional<AccGangArgument>"; + let flangClass = "AccGangArgument"; + let isValueOptional = true; } // 2.14.4 @@ -141,7 +154,7 @@ def ACCC_Host : Clause<"host"> { let flangClass = "AccObjectList"; } -// 2.5.4 +// 2.5.5 def ACCC_If : Clause <"if"> { let flangClass = "ScalarLogicalExpr"; } @@ -149,15 +162,15 @@ def ACCC_If : Clause <"if"> { // 2.14.4 def ACCC_IfPresent : Clause<"if_present"> {} -// 2.9.9 +// 2.9.6 def ACCC_Independent : Clause<"independent"> {} -// 2.13 +// 2.13.3 def ACCC_Link : Clause<"link"> { let flangClass = "AccObjectList"; } -// 2.7.9 +// 2.7.10 def ACCC_NoCreate : Clause<"no_create"> { let flangClass = "AccObjectList"; } @@ -165,29 +178,29 @@ def ACCC_NoCreate : Clause<"no_create"> { // 2.15.1 def ACCC_NoHost : Clause<"nohost"> {} -// 2.5.8 +// 2.5.9 def ACCC_NumGangs : Clause<"num_gangs"> { let flangClass = "ScalarIntExpr"; } -// 2.5.9 +// 2.5.10 def ACCC_NumWorkers : Clause<"num_workers"> { let flangClass = "ScalarIntExpr"; } -// 2.7.4 +// 2.7.5 def ACCC_Present : Clause<"present"> { let flangClass = "AccObjectList"; } -// 2.5.11 +// 2.5.12 def ACCC_Private : Clause<"private"> { let flangClass = "AccObjectList"; } -// 2.9.7 +// 2.9.8 def ACCC_Tile : Clause <"tile"> { - let flangClass = "AccSizeExprList"; + let flangClass = "AccTileExprList"; } // 2.8.1 @@ -198,14 +211,14 @@ def ACCC_UseDevice : Clause <"use_device"> { // 2.12 def ACCC_Read : Clause<"read"> {} -// 2.5.13 +// 2.5.14 def ACCC_Reduction : Clause<"reduction"> { let flangClass = "AccObjectListWithReduction"; } -// 2.5.5 +// 2.5.6 def ACCC_Self : Clause<"self"> { - let flangClass = "std::optional<ScalarLogicalExpr>"; + let flangClass = "AccSelfClause"; } // 2.9.5 @@ -213,29 +226,32 @@ def ACCC_Seq : Clause<"seq"> {} // 2.9.4 def ACCC_Vector : Clause<"vector"> { - let flangClass = "std::optional<ScalarIntExpr>"; + let flangClass = "ScalarIntExpr"; + let isValueOptional = true; } -// 2.5.10 +// 2.5.11 def ACCC_VectorLength : Clause<"vector_length"> { let flangClass = "ScalarIntExpr"; } // 2.16.2 def ACCC_Wait : Clause<"wait"> { - let flangClass = "std::optional<AccWaitArgument>"; + let flangClass = "AccWaitArgument"; + let isValueOptional = true; } // 2.9.3 def ACCC_Worker: Clause<"worker"> { - let flangClass = "std::optional<ScalarIntExpr>"; + let flangClass = "ScalarIntExpr"; + let isValueOptional = true; } // 2.12 def ACCC_Write : Clause<"write"> {} def ACCC_Unknown : Clause<"unknown"> { - let isDefault = 1; + let isDefault = true; } //===----------------------------------------------------------------------===// @@ -248,7 +264,8 @@ def ACC_Atomic : Directive<"atomic"> {} // 2.6.5 def ACC_Data : Directive<"data"> { let allowedOnceClauses = [ - VersionedClause<ACCC_If> + VersionedClause<ACCC_If>, + VersionedClause<ACCC_Default> ]; let requiredClauses = [ VersionedClause<ACCC_Attach>, @@ -277,7 +294,7 @@ def ACC_Declare : Directive<"declare"> { ]; } -// 2.5.2 +// 2.5.3 def ACC_Kernels : Directive<"kernels"> { let allowedClauses = [ VersionedClause<ACCC_Attach>, @@ -288,7 +305,8 @@ def ACC_Kernels : Directive<"kernels"> { VersionedClause<ACCC_DeviceType>, VersionedClause<ACCC_NoCreate>, VersionedClause<ACCC_Present>, - VersionedClause<ACCC_DevicePtr> + VersionedClause<ACCC_DevicePtr>, + VersionedClause<ACCC_Wait> ]; let allowedOnceClauses = [ VersionedClause<ACCC_Async>, @@ -297,8 +315,7 @@ def ACC_Kernels : Directive<"kernels"> { VersionedClause<ACCC_NumGangs>, VersionedClause<ACCC_NumWorkers>, VersionedClause<ACCC_Self>, - VersionedClause<ACCC_VectorLength>, - VersionedClause<ACCC_Wait> + VersionedClause<ACCC_VectorLength> ]; } @@ -330,8 +347,10 @@ def ACC_Parallel : Directive<"parallel"> { ]; } -// 2.5.3 +// 2.5.2 def ACC_Serial : Directive<"serial"> { + // Spec line 950-951: clause is as for the parallel construct except that the + // num_gangs, num_workers, and vector_length clauses are not permitted. let allowedClauses = [ VersionedClause<ACCC_Attach>, VersionedClause<ACCC_Copy>, @@ -340,10 +359,10 @@ def ACC_Serial : Directive<"serial"> { VersionedClause<ACCC_Create>, VersionedClause<ACCC_DevicePtr>, VersionedClause<ACCC_DeviceType>, - VersionedClause<ACCC_FirstPrivate>, VersionedClause<ACCC_NoCreate>, VersionedClause<ACCC_Present>, VersionedClause<ACCC_Private>, + VersionedClause<ACCC_FirstPrivate>, VersionedClause<ACCC_Wait> ]; let allowedOnceClauses = [ @@ -406,9 +425,15 @@ def ACC_Routine : Directive<"routine"> { // 2.14.3 def ACC_Set : Directive<"set"> { let allowedOnceClauses = [ + VersionedClause<ACCC_DefaultAsync>, + VersionedClause<ACCC_DeviceNum>, + VersionedClause<ACCC_DeviceType>, VersionedClause<ACCC_If> ]; let requiredClauses = [ + // The three following clauses are also in allowedOnceClauses list due to + // restriction 2255 - Two instances of the same clause may not appear on the + // same directive. VersionedClause<ACCC_DefaultAsync>, VersionedClause<ACCC_DeviceNum>, VersionedClause<ACCC_DeviceType> @@ -478,6 +503,8 @@ def ACC_ExitData : Directive<"exit data"> { VersionedClause<ACCC_Detach> ]; } + +// 2.8 def ACC_HostData : Directive<"host_data"> { let allowedClauses = [ VersionedClause<ACCC_If>, @@ -508,7 +535,6 @@ def ACC_KernelsLoop : Directive<"kernels loop"> { VersionedClause<ACCC_Default>, VersionedClause<ACCC_Gang>, VersionedClause<ACCC_If>, - VersionedClause<ACCC_Independent>, VersionedClause<ACCC_NumGangs>, VersionedClause<ACCC_NumWorkers>, VersionedClause<ACCC_Reduction>, @@ -600,5 +626,5 @@ def ACC_SerialLoop : Directive<"serial loop"> { } def ACC_Unknown : Directive<"unknown"> { - let isDefault = 1; -}
\ No newline at end of file + let isDefault = true; +} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index a565bdf90b3f..10fa5a37b891 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -21,10 +21,11 @@ def OpenMP : DirectiveLanguage { let cppNamespace = "omp"; // final namespace will be llvm::omp let directivePrefix = "OMPD_"; let clausePrefix = "OMPC_"; - let makeEnumAvailableInNamespace = 1; - let enableBitmaskEnumInNamespace = 1; + let makeEnumAvailableInNamespace = true; + let enableBitmaskEnumInNamespace = true; let includeHeader = "llvm/Frontend/OpenMP/OMP.h.inc"; let clauseEnumSetClass = "OmpClauseSet"; + let flangClauseBaseClass = "OmpClause"; } //===----------------------------------------------------------------------===// @@ -33,39 +34,120 @@ def OpenMP : DirectiveLanguage { def OMPC_Allocator : Clause<"allocator"> { let clangClass = "OMPAllocatorClause"; + let flangClass = "ScalarIntExpr"; +} +def OMPC_If : Clause<"if"> { + let clangClass = "OMPIfClause"; + let flangClass = "OmpIfClause"; +} +def OMPC_Final : Clause<"final"> { + let clangClass = "OMPFinalClause"; + let flangClass = "ScalarLogicalExpr"; } -def OMPC_If : Clause<"if"> { let clangClass = "OMPIfClause"; } -def OMPC_Final : Clause<"final"> { let clangClass = "OMPFinalClause"; } def OMPC_NumThreads : Clause<"num_threads"> { let clangClass = "OMPNumThreadsClause"; + let flangClass = "ScalarIntExpr"; +} +def OMPC_SafeLen : Clause<"safelen"> { + let clangClass = "OMPSafelenClause"; + let flangClass = "ScalarIntConstantExpr"; +} +def OMPC_SimdLen : Clause<"simdlen"> { + let clangClass = "OMPSimdlenClause"; + let flangClass = "ScalarIntConstantExpr"; +} +def OMPC_Collapse : Clause<"collapse"> { + let clangClass = "OMPCollapseClause"; + let flangClass = "ScalarIntConstantExpr"; +} +def OMPC_Default : Clause<"default"> { + let clangClass = "OMPDefaultClause"; + let flangClass = "OmpDefaultClause"; +} +def OMPC_Private : Clause<"private"> { + let clangClass = "OMPPrivateClause"; + let flangClass = "OmpObjectList"; } -def OMPC_SafeLen : Clause<"safelen"> { let clangClass = "OMPSafelenClause"; } -def OMPC_SimdLen : Clause<"simdlen"> { let clangClass = "OMPSimdlenClause"; } -def OMPC_Collapse : Clause<"collapse"> { let clangClass = "OMPCollapseClause"; } -def OMPC_Default : Clause<"default"> { let clangClass = "OMPDefaultClause"; } -def OMPC_Private : Clause<"private"> { let clangClass = "OMPPrivateClause"; } def OMPC_FirstPrivate : Clause<"firstprivate"> { let clangClass = "OMPFirstprivateClause"; + let flangClass = "OmpObjectList"; } def OMPC_LastPrivate : Clause<"lastprivate"> { let clangClass = "OMPLastprivateClause"; + let flangClass = "OmpObjectList"; +} +def OMPC_Shared : Clause<"shared"> { + let clangClass = "OMPSharedClause"; + let flangClass = "OmpObjectList"; } -def OMPC_Shared : Clause<"shared"> { let clangClass = "OMPSharedClause"; } def OMPC_Reduction : Clause<"reduction"> { let clangClass = "OMPReductionClause"; + let flangClass = "OmpReductionClause"; +} +def OMPC_Linear : Clause<"linear"> { + let clangClass = "OMPLinearClause"; + let flangClass = "OmpLinearClause"; +} +def OMPC_Aligned : Clause<"aligned"> { + let clangClass = "OMPAlignedClause"; + let flangClass = "OmpAlignedClause"; +} +def OMPC_Copyin : Clause<"copyin"> { + let clangClass = "OMPCopyinClause"; + let flangClass = "OmpObjectList"; } -def OMPC_Linear : Clause<"linear"> { let clangClass = "OMPLinearClause"; } -def OMPC_Aligned : Clause<"aligned"> { let clangClass = "OMPAlignedClause"; } -def OMPC_Copyin : Clause<"copyin"> { let clangClass = "OMPCopyinClause"; } def OMPC_CopyPrivate : Clause<"copyprivate"> { let clangClass = "OMPCopyprivateClause"; + let flangClass = "OmpObjectList"; } +def OMP_PROC_BIND_master : ClauseVal<"master",2,1> {} +def OMP_PROC_BIND_close : ClauseVal<"close",3,1> {} +def OMP_PROC_BIND_spread : ClauseVal<"spread",4,1> {} +def OMP_PROC_BIND_default : ClauseVal<"default",5,0> {} +def OMP_PROC_BIND_unknown : ClauseVal<"unknown",6,0> { let isDefault = true; } def OMPC_ProcBind : Clause<"proc_bind"> { let clangClass = "OMPProcBindClause"; + let flangClass = "OmpProcBindClause"; + let enumClauseValue = "ProcBindKind"; + let allowedClauseValues = [ + OMP_PROC_BIND_master, + OMP_PROC_BIND_close, + OMP_PROC_BIND_spread, + OMP_PROC_BIND_default, + OMP_PROC_BIND_unknown + ]; +} + +// static and auto are C++ keywords so need a capital to disambiguate. +def OMP_SCHEDULE_Static : ClauseVal<"Static", 2, 1> {} +def OMP_SCHEDULE_Dynamic : ClauseVal<"Dynamic", 3, 1> {} +def OMP_SCHEDULE_Guided : ClauseVal<"Guided", 4, 1> {} +def OMP_SCHEDULE_Auto : ClauseVal<"Auto", 5, 1> {} +def OMP_SCHEDULE_Runtime : ClauseVal<"Runtime", 6, 1> {} +def OMP_SCHEDULE_Default : ClauseVal<"Default", 7, 0> { let isDefault = 1; } + +def OMPC_Schedule : Clause<"schedule"> { + let clangClass = "OMPScheduleClause"; + let flangClass = "OmpScheduleClause"; + let enumClauseValue = "ScheduleKind"; + let allowedClauseValues = [ + OMP_SCHEDULE_Static, + OMP_SCHEDULE_Dynamic, + OMP_SCHEDULE_Guided, + OMP_SCHEDULE_Auto, + OMP_SCHEDULE_Runtime, + OMP_SCHEDULE_Default + ]; +} + +def OMPC_Ordered : Clause<"ordered"> { + let clangClass = "OMPOrderedClause"; + let flangClass = "ScalarIntConstantExpr"; + let isValueOptional = true; +} +def OMPC_NoWait : Clause<"nowait"> { + let clangClass = "OMPNowaitClause"; } -def OMPC_Schedule : Clause<"schedule"> { let clangClass = "OMPScheduleClause"; } -def OMPC_Ordered : Clause<"ordered"> { let clangClass = "OMPOrderedClause"; } -def OMPC_NoWait : Clause<"nowait"> { let clangClass = "OMPNowaitClause"; } def OMPC_Untied : Clause<"untied"> { let clangClass = "OMPUntiedClause"; } def OMPC_Mergeable : Clause<"mergeable"> { let clangClass = "OMPMergeableClause"; @@ -79,50 +161,77 @@ def OMPC_AcqRel : Clause<"acq_rel"> { let clangClass = "OMPAcqRelClause"; } def OMPC_Acquire : Clause<"acquire"> { let clangClass = "OMPAcquireClause"; } def OMPC_Release : Clause<"release"> { let clangClass = "OMPReleaseClause"; } def OMPC_Relaxed : Clause<"relaxed"> { let clangClass = "OMPRelaxedClause"; } -def OMPC_Depend : Clause<"depend"> { let clangClass = "OMPDependClause"; } -def OMPC_Device : Clause<"device"> { let clangClass = "OMPDeviceClause"; } +def OMPC_Depend : Clause<"depend"> { + let clangClass = "OMPDependClause"; + let flangClass = "OmpDependClause"; +} +def OMPC_Device : Clause<"device"> { + let clangClass = "OMPDeviceClause"; + let flangClass = "ScalarIntExpr"; +} def OMPC_Threads : Clause<"threads"> { let clangClass = "OMPThreadsClause"; } def OMPC_Simd : Clause<"simd"> { let clangClass = "OMPSIMDClause"; } -def OMPC_Map : Clause<"map"> { let clangClass = "OMPMapClause"; } +def OMPC_Map : Clause<"map"> { + let clangClass = "OMPMapClause"; + let flangClass = "OmpMapClause"; +} def OMPC_NumTeams : Clause<"num_teams"> { let clangClass = "OMPNumTeamsClause"; + let flangClass = "ScalarIntExpr"; } def OMPC_ThreadLimit : Clause<"thread_limit"> { let clangClass = "OMPThreadLimitClause"; + let flangClass = "ScalarIntExpr"; } def OMPC_Priority : Clause<"priority"> { let clangClass = "OMPPriorityClause"; + let flangClass = "ScalarIntExpr"; } def OMPC_GrainSize : Clause<"grainsize"> { let clangClass = "OMPGrainsizeClause"; + let flangClass = "ScalarIntExpr"; } def OMPC_NoGroup : Clause<"nogroup"> { let clangClass = "OMPNogroupClause"; } def OMPC_NumTasks : Clause<"num_tasks"> { let clangClass = "OMPNumTasksClause"; + let flangClass = "ScalarIntExpr"; } def OMPC_Hint : Clause<"hint"> { let clangClass = "OMPHintClause"; + let flangClass = "ConstantExpr"; } def OMPC_DistSchedule : Clause<"dist_schedule"> { let clangClass = "OMPDistScheduleClause"; + let flangClass = "ScalarIntExpr"; + let isValueOptional = true; } def OMPC_DefaultMap : Clause<"defaultmap"> { let clangClass = "OMPDefaultmapClause"; + let flangClass = "OmpDefaultmapClause"; } def OMPC_To : Clause<"to"> { let clangClass = "OMPToClause"; + let flangClass = "OmpObjectList"; +} +def OMPC_From : Clause<"from"> { + let clangClass = "OMPFromClause"; + let flangClass = "OmpObjectList"; } -def OMPC_From : Clause<"from"> { let clangClass = "OMPFromClause"; } def OMPC_UseDevicePtr : Clause<"use_device_ptr"> { let clangClass = "OMPUseDevicePtrClause"; + let flangClass = "Name"; + let isValueList = true; } def OMPC_IsDevicePtr : Clause<"is_device_ptr"> { let clangClass = "OMPIsDevicePtrClause"; + let flangClass = "Name"; + let isValueList = true; } def OMPC_TaskReduction : Clause<"task_reduction"> { let clangClass = "OMPTaskReductionClause"; + let flangClass = "OmpReductionClause"; } def OMPC_InReduction : Clause<"in_reduction"> { let clangClass = "OMPInReductionClause"; @@ -144,12 +253,19 @@ def OMPC_AtomicDefaultMemOrder : Clause<"atomic_default_mem_order"> { } def OMPC_Allocate : Clause<"allocate"> { let clangClass = "OMPAllocateClause"; + let flangClass = "OmpAllocateClause"; } def OMPC_NonTemporal : Clause<"nontemporal"> { let clangClass = "OMPNontemporalClause"; } + +def OMP_ORDER_concurrent : ClauseVal<"default",2,0> { let isDefault = 1; } def OMPC_Order : Clause<"order"> { let clangClass = "OMPOrderClause"; + let enumClauseValue = "OrderKind"; + let allowedClauseValues = [ + OMP_ORDER_concurrent + ]; } def OMPC_Destroy : Clause<"destroy"> { let clangClass = "OMPDestroyClause"; @@ -172,26 +288,31 @@ def OMPC_Affinity : Clause<"affinity"> { def OMPC_UseDeviceAddr : Clause<"use_device_addr"> { let clangClass = "OMPUseDeviceAddrClause"; } -def OMPC_Uniform : Clause<"uniform"> {} +def OMPC_Uniform : Clause<"uniform"> { + let flangClass = "Name"; + let isValueList = true; +} def OMPC_DeviceType : Clause<"device_type"> {} def OMPC_Match : Clause<"match"> {} def OMPC_Depobj : Clause<"depobj"> { let clangClass = "OMPDepobjClause"; - let isImplicit = 1; + let isImplicit = true; } def OMPC_Flush : Clause<"flush"> { let clangClass = "OMPFlushClause"; - let isImplicit = 1; + let isImplicit = true; } def OMPC_ThreadPrivate : Clause<"threadprivate"> { let alternativeName = "threadprivate or thread local"; - let isImplicit = 1; + let isImplicit = true; } def OMPC_Unknown : Clause<"unknown"> { - let isImplicit = 1; - let isDefault = 1; + let isImplicit = true; + let isDefault = true; +} +def OMPC_Link : Clause<"link"> { + let flangClass = "OmpObjectList"; } -def OMPC_Link : Clause<"link"> {} def OMPC_Inbranch : Clause<"inbranch"> {} def OMPC_Notinbranch : Clause<"notinbranch"> {} @@ -202,7 +323,6 @@ def OMPC_Notinbranch : Clause<"notinbranch"> {} def OMP_ThreadPrivate : Directive<"threadprivate"> {} def OMP_Parallel : Directive<"parallel"> { let allowedClauses = [ - VersionedClause<OMPC_Default>, VersionedClause<OMPC_Private>, VersionedClause<OMPC_FirstPrivate>, VersionedClause<OMPC_Shared>, @@ -211,6 +331,7 @@ def OMP_Parallel : Directive<"parallel"> { VersionedClause<OMPC_Allocate> ]; let allowedOnceClauses = [ + VersionedClause<OMPC_Default>, VersionedClause<OMPC_If>, VersionedClause<OMPC_NumThreads>, VersionedClause<OMPC_ProcBind>, @@ -218,7 +339,6 @@ def OMP_Parallel : Directive<"parallel"> { } def OMP_Task : Directive<"task"> { let allowedClauses = [ - VersionedClause<OMPC_Default>, VersionedClause<OMPC_Private>, VersionedClause<OMPC_FirstPrivate>, VersionedClause<OMPC_Shared>, @@ -231,6 +351,7 @@ def OMP_Task : Directive<"task"> { VersionedClause<OMPC_Affinity, 50> ]; let allowedOnceClauses = [ + VersionedClause<OMPC_Default>, VersionedClause<OMPC_If>, VersionedClause<OMPC_Final>, VersionedClause<OMPC_Priority> @@ -312,7 +433,11 @@ def OMP_Critical : Directive<"critical"> { } def OMP_TaskYield : Directive<"taskyield"> {} def OMP_Barrier : Directive<"barrier"> {} -def OMP_TaskWait : Directive<"taskwait"> {} +def OMP_TaskWait : Directive<"taskwait"> { + let allowedClauses = [ + VersionedClause<OMPC_Depend, 50> + ]; +} def OMP_TaskGroup : Directive<"taskgroup"> { let allowedClauses = [ VersionedClause<OMPC_TaskReduction>, @@ -320,7 +445,7 @@ def OMP_TaskGroup : Directive<"taskgroup"> { ]; } def OMP_Flush : Directive<"flush"> { - let allowedClauses = [ + let allowedOnceClauses = [ VersionedClause<OMPC_AcqRel, 50>, VersionedClause<OMPC_Acquire, 50>, VersionedClause<OMPC_Release, 50>, @@ -342,6 +467,8 @@ def OMP_Atomic : Directive<"atomic"> { VersionedClause<OMPC_Write>, VersionedClause<OMPC_Update>, VersionedClause<OMPC_Capture>, + ]; + let allowedOnceClauses = [ VersionedClause<OMPC_SeqCst>, VersionedClause<OMPC_AcqRel, 50>, VersionedClause<OMPC_Acquire, 50>, @@ -398,7 +525,6 @@ def OMP_Requires : Directive<"requires"> { } def OMP_TargetData : Directive<"target data"> { let allowedClauses = [ - VersionedClause<OMPC_Map>, VersionedClause<OMPC_UseDevicePtr>, VersionedClause<OMPC_UseDeviceAddr, 50> ]; @@ -412,19 +538,20 @@ def OMP_TargetData : Directive<"target data"> { } def OMP_TargetEnterData : Directive<"target enter data"> { let allowedClauses = [ - VersionedClause<OMPC_Depend>, - VersionedClause<OMPC_Map> + VersionedClause<OMPC_Depend> ]; let allowedOnceClauses = [ VersionedClause<OMPC_If>, VersionedClause<OMPC_Device>, VersionedClause<OMPC_NoWait> ]; + let requiredClauses = [ + VersionedClause<OMPC_Map> + ]; } def OMP_TargetExitData : Directive<"target exit data"> { let allowedClauses = [ - VersionedClause<OMPC_Depend>, - VersionedClause<OMPC_Map> + VersionedClause<OMPC_Depend> ]; let allowedOnceClauses = [ VersionedClause<OMPC_Device>, @@ -708,7 +835,6 @@ def OMP_TaskLoop : Directive<"taskloop"> { VersionedClause<OMPC_Private>, VersionedClause<OMPC_FirstPrivate>, VersionedClause<OMPC_LastPrivate>, - VersionedClause<OMPC_Default>, VersionedClause<OMPC_Untied>, VersionedClause<OMPC_Mergeable>, VersionedClause<OMPC_NoGroup>, @@ -717,6 +843,7 @@ def OMP_TaskLoop : Directive<"taskloop"> { VersionedClause<OMPC_Allocate> ]; let allowedOnceClauses = [ + VersionedClause<OMPC_Default>, VersionedClause<OMPC_If>, VersionedClause<OMPC_Collapse>, VersionedClause<OMPC_Final>, @@ -770,7 +897,12 @@ def OMP_Distribute : Directive<"distribute"> { VersionedClause<OMPC_DistSchedule> ]; } -def OMP_DeclareTarget : Directive<"declare target"> {} +def OMP_DeclareTarget : Directive<"declare target"> { + let allowedClauses = [ + VersionedClause<OMPC_To>, + VersionedClause<OMPC_Link> + ]; +} def OMP_EndDeclareTarget : Directive<"end declare target"> {} def OMP_DistributeParallelFor : Directive<"distribute parallel for"> { let allowedClauses = [ @@ -1460,6 +1592,9 @@ def OMP_Scan : Directive<"scan"> { VersionedClause<OMPC_Exclusive, 50> ]; } +def OMP_Assumes : Directive<"assumes"> {} +def OMP_BeginAssumes : Directive<"begin assumes"> {} +def OMP_EndAssumes : Directive<"end assumes"> {} def OMP_BeginDeclareVariant : Directive<"begin declare variant"> {} def OMP_EndDeclareVariant : Directive<"end declare variant"> {} def OMP_ParallelWorkshare : Directive<"parallel workshare"> { @@ -1481,9 +1616,24 @@ def OMP_ParallelWorkshare : Directive<"parallel workshare"> { def OMP_Workshare : Directive<"workshare"> {} def OMP_EndDo : Directive<"end do"> {} def OMP_EndDoSimd : Directive<"end do simd"> {} -def OMP_EndSections : Directive<"end sections"> {} -def OMP_EndSingle : Directive<"end single"> {} -def OMP_EndWorkshare : Directive<"end workshare"> {} +def OMP_EndSections : Directive<"end sections"> { + let allowedOnceClauses = [ + VersionedClause<OMPC_NoWait> + ]; +} +def OMP_EndSingle : Directive<"end single"> { + let allowedClauses = [ + VersionedClause<OMPC_CopyPrivate> + ]; + let allowedOnceClauses = [ + VersionedClause<OMPC_NoWait> + ]; +} +def OMP_EndWorkshare : Directive<"end workshare"> { + let allowedClauses = [ + VersionedClause<OMPC_NoWait> + ]; +} def OMP_Unknown : Directive<"unknown"> { - let isDefault = 1; + let isDefault = true; } diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index d171d0a2b6c4..36ce3fc0f66f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -16,6 +16,7 @@ #include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Frontend/OpenMP/OMP.h.inc" namespace llvm { @@ -41,12 +42,12 @@ enum class InternalControlVar { #include "llvm/Frontend/OpenMP/OMPKinds.def" enum class ICVInitValue { -#define ICV_DATA_ENV(Enum, Name, EnvVar, Init) Init, +#define ICV_INIT_VALUE(Enum, Name) Enum, #include "llvm/Frontend/OpenMP/OMPKinds.def" }; -#define ICV_DATA_ENV(Enum, Name, EnvVar, Init) \ - constexpr auto Init = omp::ICVInitValue::Init; +#define ICV_INIT_VALUE(Enum, Name) \ + constexpr auto Enum = omp::ICVInitValue::Enum; #include "llvm/Frontend/OpenMP/OMPKinds.def" /// IDs for all omp runtime library (RTL) functions. @@ -68,16 +69,6 @@ enum class DefaultKind { constexpr auto Enum = omp::DefaultKind::Enum; #include "llvm/Frontend/OpenMP/OMPKinds.def" -/// IDs for the different proc bind kinds. -enum class ProcBindKind { -#define OMP_PROC_BIND_KIND(Enum, Str, Value) Enum = Value, -#include "llvm/Frontend/OpenMP/OMPKinds.def" -}; - -#define OMP_PROC_BIND_KIND(Enum, ...) \ - constexpr auto Enum = omp::ProcBindKind::Enum; -#include "llvm/Frontend/OpenMP/OMPKinds.def" - /// IDs for all omp runtime library ident_t flag encodings (see /// their defintion in openmp/runtime/src/kmp.h). enum class IdentFlag { @@ -89,6 +80,33 @@ enum class IdentFlag { #define OMP_IDENT_FLAG(Enum, ...) constexpr auto Enum = omp::IdentFlag::Enum; #include "llvm/Frontend/OpenMP/OMPKinds.def" +/// Helper to describe assume clauses. +struct AssumptionClauseMappingInfo { + /// The identifier describing the (beginning of the) clause. + llvm::StringLiteral Identifier; + /// Flag to determine if the identifier is a full name or the start of a name. + bool StartsWith; + /// Flag to determine if a directive lists follows. + bool HasDirectiveList; + /// Flag to determine if an expression follows. + bool HasExpression; +}; + +/// All known assume clauses. +static constexpr AssumptionClauseMappingInfo AssumptionClauseMappings[] = { +#define OMP_ASSUME_CLAUSE(Identifier, StartsWith, HasDirectiveList, \ + HasExpression) \ + {Identifier, StartsWith, HasDirectiveList, HasExpression}, +#include "llvm/Frontend/OpenMP/OMPKinds.def" +}; + +inline std::string getAllAssumeClauseOptions() { + std::string S; + for (const AssumptionClauseMappingInfo &ACMI : AssumptionClauseMappings) + S += (S.empty() ? "'" : "', '") + ACMI.Identifier.str(); + return S + "'"; +} + } // end namespace omp } // end namespace llvm diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPContext.h b/llvm/include/llvm/Frontend/OpenMP/OMPContext.h index 1a42d189db44..8a4179167c89 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPContext.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPContext.h @@ -70,15 +70,20 @@ TraitSelector getOpenMPContextTraitSelectorForProperty(TraitProperty Property); /// Return a textual representation of the trait selector \p Kind. StringRef getOpenMPContextTraitSelectorName(TraitSelector Kind); -/// Parse \p Str and return the trait set it matches or -/// TraitProperty::invalid. -TraitProperty getOpenMPContextTraitPropertyKind(TraitSet Set, StringRef Str); +/// Parse \p Str and return the trait property it matches in the set \p Set and +/// selector \p Selector or TraitProperty::invalid. +TraitProperty getOpenMPContextTraitPropertyKind(TraitSet Set, + TraitSelector Selector, + StringRef Str); /// Return the trait property for a singleton selector \p Selector. TraitProperty getOpenMPContextTraitPropertyForSelector(TraitSelector Selector); -/// Return a textual representation of the trait property \p Kind. -StringRef getOpenMPContextTraitPropertyName(TraitProperty Kind); +/// Return a textual representation of the trait property \p Kind, which might +/// be the raw string we parsed (\p RawString) if we do not translate the +/// property into a (distinct) enum. +StringRef getOpenMPContextTraitPropertyName(TraitProperty Kind, + StringRef RawString); /// Return a textual representation of the trait property \p Kind with selector /// and set name included. @@ -112,24 +117,36 @@ bool isValidTraitPropertyForTraitSetAndSelector(TraitProperty Property, /// scored (via the ScoresMap). In addition, the required consturct nesting is /// decribed as well. struct VariantMatchInfo { - /// Add the trait \p Property to the required trait set. If \p Score is not - /// null, it recorded as well. If \p Property is in the `construct` set it - /// is recorded in-order in the ConstructTraits as well. - void addTrait(TraitProperty Property, APInt *Score = nullptr) { - addTrait(getOpenMPContextTraitSetForProperty(Property), Property, Score); + /// Add the trait \p Property to the required trait set. \p RawString is the + /// string we parsed and derived \p Property from. If \p Score is not null, it + /// recorded as well. If \p Property is in the `construct` set it is recorded + /// in-order in the ConstructTraits as well. + void addTrait(TraitProperty Property, StringRef RawString, + APInt *Score = nullptr) { + addTrait(getOpenMPContextTraitSetForProperty(Property), Property, RawString, + Score); } /// Add the trait \p Property which is in set \p Set to the required trait - /// set. If \p Score is not null, it recorded as well. If \p Set is the - /// `construct` set it is recorded in-order in the ConstructTraits as well. - void addTrait(TraitSet Set, TraitProperty Property, APInt *Score = nullptr) { + /// set. \p RawString is the string we parsed and derived \p Property from. If + /// \p Score is not null, it recorded as well. If \p Set is the `construct` + /// set it is recorded in-order in the ConstructTraits as well. + void addTrait(TraitSet Set, TraitProperty Property, StringRef RawString, + APInt *Score = nullptr) { if (Score) ScoreMap[Property] = *Score; + + // Special handling for `device={isa(...)}` as we do not match the enum but + // the raw string. + if (Property == TraitProperty::device_isa___ANY) + ISATraits.push_back(RawString); + RequiredTraits.set(unsigned(Property)); if (Set == TraitSet::construct) ConstructTraits.push_back(Property); } BitVector RequiredTraits = BitVector(unsigned(TraitProperty::Last) + 1); + SmallVector<StringRef, 8> ISATraits; SmallVector<TraitProperty, 8> ConstructTraits; SmallDenseMap<TraitProperty, APInt> ScoreMap; }; @@ -139,6 +156,7 @@ struct VariantMatchInfo { /// in OpenMP constructs at the location. struct OMPContext { OMPContext(bool IsDeviceCompilation, Triple TargetTriple); + virtual ~OMPContext() = default; void addTrait(TraitProperty Property) { addTrait(getOpenMPContextTraitSetForProperty(Property), Property); @@ -149,6 +167,11 @@ struct OMPContext { ConstructTraits.push_back(Property); } + /// Hook for users to check if an ISA trait matches. The trait is described as + /// the string that got parsed and it depends on the target and context if + /// this matches or not. + virtual bool matchesISATrait(StringRef) const { return false; } + BitVector ActiveTraits = BitVector(unsigned(TraitProperty::Last) + 1); SmallVector<TraitProperty, 8> ConstructTraits; }; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h index 3ae4a2edbf96..6b48cc447e13 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h @@ -1,9 +1,8 @@ //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// @@ -29,29 +28,30 @@ namespace omp { /// use the new array name. /// /// Example usage in clang: -/// const unsigned slot_size = ctx.GetTargetInfo().getGridValue(GV_Warp_Size); +/// const unsigned slot_size = +/// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size); /// /// Example usage in libomptarget/deviceRTLs: -/// #include "OMPGridValues.h" +/// #include "llvm/Frontend/OpenMP/OMPGridValues.h" /// #ifdef __AMDGPU__ /// #define GRIDVAL AMDGPUGpuGridValues /// #else /// #define GRIDVAL NVPTXGpuGridValues /// #endif /// ... Then use this reference for GV_Warp_Size in the deviceRTL source. -/// GRIDVAL[GV_Warp_Size] +/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] /// /// Example usage in libomptarget hsa plugin: -/// #include "OMPGridValues.h" +/// #include "llvm/Frontend/OpenMP/OMPGridValues.h" /// #define GRIDVAL AMDGPUGpuGridValues /// ... Then use this reference to access GV_Warp_Size in the hsa plugin. -/// GRIDVAL[GV_Warp_Size] +/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] /// /// Example usage in libomptarget cuda plugin: -/// #include "OMPGridValues.h" +/// #include "llvm/Frontend/OpenMP/OMPGridValues.h" /// #define GRIDVAL NVPTXGpuGridValues /// ... Then use this reference to access GV_Warp_Size in the cuda plugin. -/// GRIDVAL[GV_Warp_Size] +/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] /// enum GVIDX { /// The maximum number of workers in a kernel. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 95eed59f1b3d..22204d9a9ccb 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -18,8 +18,10 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/IRBuilder.h" #include "llvm/Support/Allocator.h" +#include <forward_list> namespace llvm { +class CanonicalLoopInfo; /// An interface to create LLVM-IR for OpenMP directives. /// @@ -36,7 +38,10 @@ public: void initialize(); /// Finalize the underlying module, e.g., by outlining regions. - void finalize(); + /// \param AllowExtractorSinking Flag to include sinking instructions, + /// emitted by CodeExtractor, in the + /// outlined region. Default is false. + void finalize(bool AllowExtractorSinking = false); /// Add attributes known for \p FnID to \p Fn. void addAttributes(omp::RuntimeFunction FnID, Function &Fn); @@ -56,7 +61,7 @@ public: struct FinalizationInfo { /// The finalization callback provided by the last in-flight invocation of - /// CreateXXXX for the directive of kind DK. + /// createXXXX for the directive of kind DK. FinalizeCallbackTy FiniCB; /// The directive kind of the innermost directive that has an associated @@ -96,6 +101,17 @@ public: function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, BasicBlock &ContinuationBB)>; + /// Callback type for loop body code generation. + /// + /// \param CodeGenIP is the insertion point where the loop's body code must be + /// placed. This will be a dedicated BasicBlock with a + /// conditional branch from the loop condition check and + /// terminated with an unconditional branch to the loop + /// latch. + /// \param IndVar is the induction variable usable at the insertion point. + using LoopBodyGenCallbackTy = + function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>; + /// Callback type for variable privatization (think copy & default /// constructor). /// @@ -103,15 +119,20 @@ public: /// should be placed. /// \param CodeGenIP is the insertion point at which the privatization code /// should be placed. - /// \param Val The value beeing copied/created. + /// \param Original The value being copied/created, should not be used in the + /// generated IR. + /// \param Inner The equivalent of \p Original that should be used in the + /// generated IR; this is equal to \p Original if the value is + /// a pointer and can thus be passed directly, otherwise it is + /// an equivalent but different value. /// \param ReplVal The replacement value, thus a copy or new created version - /// of \p Val. + /// of \p Inner. /// /// \returns The new insertion point where code generation continues and - /// \p ReplVal the replacement of \p Val. + /// \p ReplVal the replacement value. using PrivatizeCallbackTy = function_ref<InsertPointTy( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Val, - Value *&ReplVal)>; + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, + Value &Inner, Value *&ReplVal)>; /// Description of a LLVM-IR insertion point (IP) and a debug/source location /// (filename, line, column, ...). @@ -139,7 +160,7 @@ public: /// should be checked and acted upon. /// /// \returns The insertion point after the barrier. - InsertPointTy CreateBarrier(const LocationDescription &Loc, omp::Directive DK, + InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall = false, bool CheckCancelFlag = true); @@ -150,12 +171,13 @@ public: /// \param CanceledDirective The kind of directive that is cancled. /// /// \returns The insertion point after the barrier. - InsertPointTy CreateCancel(const LocationDescription &Loc, Value *IfCondition, + InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective); /// Generator for '#omp parallel' /// /// \param Loc The insert and source location description. + /// \param AllocaIP The insertion points to be used for alloca instructions. /// \param BodyGenCB Callback that will generate the region code. /// \param PrivCB Callback to copy a given variable (think copy constructor). /// \param FiniCB Callback to finalize variable copies. @@ -166,25 +188,179 @@ public: /// /// \returns The insertion position *after* the parallel. IRBuilder<>::InsertPoint - CreateParallel(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, - PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, - Value *IfCondition, Value *NumThreads, - omp::ProcBindKind ProcBind, bool IsCancellable); + createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, + BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, + FinalizeCallbackTy FiniCB, Value *IfCondition, + Value *NumThreads, omp::ProcBindKind ProcBind, + bool IsCancellable); + + /// Generator for the control flow structure of an OpenMP canonical loop. + /// + /// This generator operates on the logical iteration space of the loop, i.e. + /// the caller only has to provide a loop trip count of the loop as defined by + /// base language semantics. The trip count is interpreted as an unsigned + /// integer. The induction variable passed to \p BodyGenCB will be of the same + /// type and run from 0 to \p TripCount - 1. It is up to the callback to + /// convert the logical iteration variable to the loop counter variable in the + /// loop body. + /// + /// \param Loc The insert and source location description. The insert + /// location can be between two instructions or the end of a + /// degenerate block (e.g. a BB under construction). + /// \param BodyGenCB Callback that will generate the loop body code. + /// \param TripCount Number of iterations the loop body is executed. + /// \param Name Base name used to derive BB and instruction names. + /// + /// \returns An object representing the created control flow structure which + /// can be used for loop-associated directives. + CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, + LoopBodyGenCallbackTy BodyGenCB, + Value *TripCount, + const Twine &Name = "loop"); + + /// Generator for the control flow structure of an OpenMP canonical loop. + /// + /// Instead of a logical iteration space, this allows specifying user-defined + /// loop counter values using increment, upper- and lower bounds. To + /// disambiguate the terminology when counting downwards, instead of lower + /// bounds we use \p Start for the loop counter value in the first body + /// iteration. + /// + /// Consider the following limitations: + /// + /// * A loop counter space over all integer values of its bit-width cannot be + /// represented. E.g using uint8_t, its loop trip count of 256 cannot be + /// stored into an 8 bit integer): + /// + /// DO I = 0, 255, 1 + /// + /// * Unsigned wrapping is only supported when wrapping only "once"; E.g. + /// effectively counting downwards: + /// + /// for (uint8_t i = 100u; i > 0; i += 127u) + /// + /// + /// TODO: May need to add additional parameters to represent: + /// + /// * Allow representing downcounting with unsigned integers. + /// + /// * Sign of the step and the comparison operator might disagree: + /// + /// for (int i = 0; i < 42; --i) + /// + // + /// \param Loc The insert and source location description. + /// \param BodyGenCB Callback that will generate the loop body code. + /// \param Start Value of the loop counter for the first iterations. + /// \param Stop Loop counter values past this will stop the the + /// iterations. + /// \param Step Loop counter increment after each iteration; negative + /// means counting down. \param IsSigned Whether Start, Stop + /// and Stop are signed integers. + /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop + /// counter. + /// \param ComputeIP Insertion point for instructions computing the trip + /// count. Can be used to ensure the trip count is available + /// at the outermost loop of a loop nest. If not set, + /// defaults to the preheader of the generated loop. + /// \param Name Base name used to derive BB and instruction names. + /// + /// \returns An object representing the created control flow structure which + /// can be used for loop-associated directives. + CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, + LoopBodyGenCallbackTy BodyGenCB, + Value *Start, Value *Stop, Value *Step, + bool IsSigned, bool InclusiveStop, + InsertPointTy ComputeIP = {}, + const Twine &Name = "loop"); + + /// Modifies the canonical loop to be a statically-scheduled workshare loop. + /// + /// This takes a \p LoopInfo representing a canonical loop, such as the one + /// created by \p createCanonicalLoop and emits additional instructions to + /// turn it into a workshare loop. In particular, it calls to an OpenMP + /// runtime function in the preheader to obtain the loop bounds to be used in + /// the current thread, updates the relevant instructions in the canonical + /// loop and calls to an OpenMP runtime finalization function after the loop. + /// + /// \param Loc The source location description, the insertion location + /// is not used. + /// \param CLI A descriptor of the canonical loop to workshare. + /// \param AllocaIP An insertion point for Alloca instructions usable in the + /// preheader of the loop. + /// \param NeedsBarrier Indicates whether a barrier must be insterted after + /// the loop. + /// \param Chunk The size of loop chunk considered as a unit when + /// scheduling. If \p nullptr, defaults to 1. + /// + /// \returns Updated CanonicalLoopInfo. + CanonicalLoopInfo *createStaticWorkshareLoop(const LocationDescription &Loc, + CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, + bool NeedsBarrier, + Value *Chunk = nullptr); + + /// Tile a loop nest. + /// + /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in + /// \p/ Loops must be perfectly nested, from outermost to innermost loop + /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value + /// of every loop and every tile sizes must be usable in the outermost + /// loop's preheader. This implies that the loop nest is rectangular. + /// + /// Example: + /// \code + /// for (int i = 0; i < 15; ++i) // Canonical loop "i" + /// for (int j = 0; j < 14; ++j) // Canonical loop "j" + /// body(i, j); + /// \endcode + /// + /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to + /// \code + /// for (int i1 = 0; i1 < 3; ++i1) + /// for (int j1 = 0; j1 < 2; ++j1) + /// for (int i2 = 0; i2 < 5; ++i2) + /// for (int j2 = 0; j2 < 7; ++j2) + /// body(i1*3+i2, j1*3+j2); + /// \endcode + /// + /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are + /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also + /// handles non-constant trip counts, non-constant tile sizes and trip counts + /// that are not multiples of the tile size. In the latter case the tile loop + /// of the last floor-loop iteration will have fewer iterations than specified + /// as its tile size. + /// + /// + /// @param DL Debug location for instructions added by tiling, for + /// instance the floor- and tile trip count computation. + /// @param Loops Loops to tile. The CanonicalLoopInfo objects are + /// invalidated by this method, i.e. should not used after + /// tiling. + /// @param TileSizes For each loop in \p Loops, the tile size for that + /// dimensions. + /// + /// \returns A list of generated loops. Contains twice as many loops as the + /// input loop nest; the first half are the floor loops and the + /// second half are the tile loops. + std::vector<CanonicalLoopInfo *> + tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, + ArrayRef<Value *> TileSizes); /// Generator for '#omp flush' /// /// \param Loc The location where the flush directive was encountered - void CreateFlush(const LocationDescription &Loc); + void createFlush(const LocationDescription &Loc); /// Generator for '#omp taskwait' /// /// \param Loc The location where the taskwait directive was encountered. - void CreateTaskwait(const LocationDescription &Loc); + void createTaskwait(const LocationDescription &Loc); /// Generator for '#omp taskyield' /// /// \param Loc The location where the taskyield directive was encountered. - void CreateTaskyield(const LocationDescription &Loc); + void createTaskyield(const LocationDescription &Loc); ///} @@ -210,12 +386,22 @@ public: /// Return the (LLVM-IR) string describing the default source location. Constant *getOrCreateDefaultSrcLocStr(); + /// Return the (LLVM-IR) string describing the source location identified by + /// the arguments. + Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, + unsigned Line, unsigned Column); + /// Return the (LLVM-IR) string describing the source location \p Loc. Constant *getOrCreateSrcLocStr(const LocationDescription &Loc); /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags. + /// TODO: Create a enum class for the Reserve2Flags Value *getOrCreateIdent(Constant *SrcLocStr, - omp::IdentFlag Flags = omp::IdentFlag(0)); + omp::IdentFlag Flags = omp::IdentFlag(0), + unsigned Reserve2Flags = 0); + + // Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL + Type *getLanemaskType(); /// Generate control flow and cleanup for cancellation. /// @@ -280,7 +466,7 @@ public: StringMap<Constant *> SrcLocStrMap; /// Map to remember existing ident_t*. - DenseMap<std::pair<Constant *, uint64_t>, GlobalVariable *> IdentMap; + DenseMap<std::pair<Constant *, uint64_t>, Value *> IdentMap; /// Helper that contains information about regions we need to outline /// during finalization. @@ -298,6 +484,10 @@ public: /// Collection of regions that need to be outlined during finalization. SmallVector<OutlineInfo, 16> OutlineInfos; + /// Collection of owned canonical loop objects that eventually need to be + /// free'd. + std::forward_list<CanonicalLoopInfo> LoopInfos; + /// Add a new region that will be outlined later. void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } @@ -309,6 +499,32 @@ public: StringMap<AssertingVH<Constant>, BumpPtrAllocator> InternalVars; public: + /// Generator for __kmpc_copyprivate + /// + /// \param Loc The source location description. + /// \param BufSize Number of elements in the buffer. + /// \param CpyBuf List of pointers to data to be copied. + /// \param CpyFn function to call for copying data. + /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise. + /// + /// \return The insertion position *after* the CopyPrivate call. + + InsertPointTy createCopyPrivate(const LocationDescription &Loc, + llvm::Value *BufSize, llvm::Value *CpyBuf, + llvm::Value *CpyFn, llvm::Value *DidIt); + + /// Generator for '#omp single' + /// + /// \param Loc The source location description. + /// \param BodyGenCB Callback that will generate the region code. + /// \param FiniCB Callback to finalize variable copies. + /// \param DidIt Local variable used as a flag to indicate 'single' thread + /// + /// \returns The insertion position *after* the single call. + InsertPointTy createSingle(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB, llvm::Value *DidIt); + /// Generator for '#omp master' /// /// \param Loc The insert and source location description. @@ -316,7 +532,7 @@ public: /// \param FiniCB Callback to finalize variable copies. /// /// \returns The insertion position *after* the master. - InsertPointTy CreateMaster(const LocationDescription &Loc, + InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB); @@ -329,7 +545,7 @@ public: /// \param HintInst Hint Instruction for hint clause associated with critical /// /// \returns The insertion position *after* the master. - InsertPointTy CreateCritical(const LocationDescription &Loc, + InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst); @@ -346,7 +562,7 @@ public: // and copy.in.end block /// /// \returns The insertion point where copying operation to be emitted. - InsertPointTy CreateCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, + InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd = true); @@ -359,7 +575,7 @@ public: /// \param Name Name of call Instruction for OMP_alloc /// /// \returns CallInst to the OMP_Alloc call - CallInst *CreateOMPAlloc(const LocationDescription &Loc, Value *Size, + CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name = ""); /// Create a runtime call for kmpc_free @@ -370,7 +586,7 @@ public: /// \param Name Name of call Instruction for OMP_Free /// /// \returns CallInst to the OMP_Free call - CallInst *CreateOMPFree(const LocationDescription &Loc, Value *Addr, + CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name = ""); /// Create a runtime call for kmpc_threadprivate_cached @@ -381,7 +597,7 @@ public: /// \param Name Name of call Instruction for callinst /// /// \returns CallInst to the thread private cache call. - CallInst *CreateCachedThreadPrivate(const LocationDescription &Loc, + CallInst *createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name = Twine("")); @@ -489,6 +705,155 @@ private: /// \param CriticalName Name of the critical region. /// Value *getOMPCriticalRegionLock(StringRef CriticalName); + + /// Create the control flow structure of a canonical OpenMP loop. + /// + /// The emitted loop will be disconnected, i.e. no edge to the loop's + /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's + /// IRBuilder location is not preserved. + /// + /// \param DL DebugLoc used for the instructions in the skeleton. + /// \param TripCount Value to be used for the trip count. + /// \param F Function in which to insert the BasicBlocks. + /// \param PreInsertBefore Where to insert BBs that execute before the body, + /// typically the body itself. + /// \param PostInsertBefore Where to insert BBs that execute after the body. + /// \param Name Base name used to derive BB + /// and instruction names. + /// + /// \returns The CanonicalLoopInfo that represents the emitted loop. + CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount, + Function *F, + BasicBlock *PreInsertBefore, + BasicBlock *PostInsertBefore, + const Twine &Name = {}); +}; + +/// Class to represented the control flow structure of an OpenMP canonical loop. +/// +/// The control-flow structure is standardized for easy consumption by +/// directives associated with loops. For instance, the worksharing-loop +/// construct may change this control flow such that each loop iteration is +/// executed on only one thread. +/// +/// The control flow can be described as follows: +/// +/// Preheader +/// | +/// /-> Header +/// | | +/// | Cond---\ +/// | | | +/// | Body | +/// | | | | +/// | <...> | +/// | | | | +/// \--Latch | +/// | +/// Exit +/// | +/// After +/// +/// Code in the header, condition block, latch and exit block must not have any +/// side-effect. The body block is the single entry point into the loop body, +/// which may contain arbitrary control flow as long as all control paths +/// eventually branch to the latch block. +/// +/// Defined outside OpenMPIRBuilder because one cannot forward-declare nested +/// classes. +class CanonicalLoopInfo { + friend class OpenMPIRBuilder; + +private: + /// Whether this object currently represents a loop. + bool IsValid = false; + + BasicBlock *Preheader; + BasicBlock *Header; + BasicBlock *Cond; + BasicBlock *Body; + BasicBlock *Latch; + BasicBlock *Exit; + BasicBlock *After; + + /// Add the control blocks of this loop to \p BBs. + /// + /// This does not include any block from the body, including the one returned + /// by getBody(). + void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs); + +public: + /// The preheader ensures that there is only a single edge entering the loop. + /// Code that must be execute before any loop iteration can be emitted here, + /// such as computing the loop trip count and begin lifetime markers. Code in + /// the preheader is not considered part of the canonical loop. + BasicBlock *getPreheader() const { return Preheader; } + + /// The header is the entry for each iteration. In the canonical control flow, + /// it only contains the PHINode for the induction variable. + BasicBlock *getHeader() const { return Header; } + + /// The condition block computes whether there is another loop iteration. If + /// yes, branches to the body; otherwise to the exit block. + BasicBlock *getCond() const { return Cond; } + + /// The body block is the single entry for a loop iteration and not controlled + /// by CanonicalLoopInfo. It can contain arbitrary control flow but must + /// eventually branch to the \p Latch block. + BasicBlock *getBody() const { return Body; } + + /// Reaching the latch indicates the end of the loop body code. In the + /// canonical control flow, it only contains the increment of the induction + /// variable. + BasicBlock *getLatch() const { return Latch; } + + /// Reaching the exit indicates no more iterations are being executed. + BasicBlock *getExit() const { return Exit; } + + /// The after block is intended for clean-up code such as lifetime end + /// markers. It is separate from the exit block to ensure, analogous to the + /// preheader, it having just a single entry edge and being free from PHI + /// nodes should there be multiple loop exits (such as from break + /// statements/cancellations). + BasicBlock *getAfter() const { return After; } + + /// Returns the llvm::Value containing the number of loop iterations. It must + /// be valid in the preheader and always interpreted as an unsigned integer of + /// any bit-width. + Value *getTripCount() const { + Instruction *CmpI = &Cond->front(); + assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); + return CmpI->getOperand(1); + } + + /// Returns the instruction representing the current logical induction + /// variable. Always unsigned, always starting at 0 with an increment of one. + Instruction *getIndVar() const { + Instruction *IndVarPHI = &Header->front(); + assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI"); + return IndVarPHI; + } + + /// Return the type of the induction variable (and the trip count). + Type *getIndVarType() const { return getIndVar()->getType(); } + + /// Return the insertion point for user code before the loop. + OpenMPIRBuilder::InsertPointTy getPreheaderIP() const { + return {Preheader, std::prev(Preheader->end())}; + }; + + /// Return the insertion point for user code in the body. + OpenMPIRBuilder::InsertPointTy getBodyIP() const { + return {Body, Body->begin()}; + }; + + /// Return the insertion point for user code after the loop. + OpenMPIRBuilder::InsertPointTy getAfterIP() const { + return {After, After->begin()}; + }; + + /// Consistency self-check. + void assertOK() const; }; } // end namespace llvm diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 93ea63c1c2e6..844046167975 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -7,217 +7,15 @@ //===----------------------------------------------------------------------===// /// \file /// -/// This file defines the list of supported OpenMP directives, clauses, runtime +/// This file defines the list of supported OpenMP runtime /// calls, and other things that need to be listed in enums. /// -//===----------------------------------------------------------------------===// - -/// OpenMP Directives and combined directives +/// This file is under transition to OMP.td with TableGen code generation. /// -///{ - -#ifndef OMP_DIRECTIVE -#define OMP_DIRECTIVE(Enum, Str) -#endif - -#define __OMP_DIRECTIVE_EXT(Name, Str) OMP_DIRECTIVE(OMPD_##Name, Str) -#define __OMP_DIRECTIVE(Name) __OMP_DIRECTIVE_EXT(Name, #Name) - -__OMP_DIRECTIVE(threadprivate) -__OMP_DIRECTIVE(parallel) -__OMP_DIRECTIVE(task) -__OMP_DIRECTIVE(simd) -__OMP_DIRECTIVE(for) -__OMP_DIRECTIVE(sections) -__OMP_DIRECTIVE(section) -__OMP_DIRECTIVE(single) -__OMP_DIRECTIVE(master) -__OMP_DIRECTIVE(critical) -__OMP_DIRECTIVE(taskyield) -__OMP_DIRECTIVE(barrier) -__OMP_DIRECTIVE(taskwait) -__OMP_DIRECTIVE(taskgroup) -__OMP_DIRECTIVE(flush) -__OMP_DIRECTIVE(ordered) -__OMP_DIRECTIVE(atomic) -__OMP_DIRECTIVE(target) -__OMP_DIRECTIVE(teams) -__OMP_DIRECTIVE(cancel) -__OMP_DIRECTIVE(requires) -__OMP_DIRECTIVE_EXT(target_data, "target data") -__OMP_DIRECTIVE_EXT(target_enter_data, "target enter data") -__OMP_DIRECTIVE_EXT(target_exit_data, "target exit data") -__OMP_DIRECTIVE_EXT(target_parallel, "target parallel") -__OMP_DIRECTIVE_EXT(target_parallel_for, "target parallel for") -__OMP_DIRECTIVE_EXT(target_update, "target update") -__OMP_DIRECTIVE_EXT(parallel_for, "parallel for") -__OMP_DIRECTIVE_EXT(parallel_for_simd, "parallel for simd") -__OMP_DIRECTIVE_EXT(parallel_master, "parallel master") -__OMP_DIRECTIVE_EXT(parallel_sections, "parallel sections") -__OMP_DIRECTIVE_EXT(for_simd, "for simd") -__OMP_DIRECTIVE_EXT(cancellation_point, "cancellation point") -__OMP_DIRECTIVE_EXT(declare_reduction, "declare reduction") -__OMP_DIRECTIVE_EXT(declare_mapper, "declare mapper") -__OMP_DIRECTIVE_EXT(declare_simd, "declare simd") -__OMP_DIRECTIVE(taskloop) -__OMP_DIRECTIVE_EXT(taskloop_simd, "taskloop simd") -__OMP_DIRECTIVE(distribute) -__OMP_DIRECTIVE_EXT(declare_target, "declare target") -__OMP_DIRECTIVE_EXT(end_declare_target, "end declare target") -__OMP_DIRECTIVE_EXT(distribute_parallel_for, "distribute parallel for") -__OMP_DIRECTIVE_EXT(distribute_parallel_for_simd, - "distribute parallel for simd") -__OMP_DIRECTIVE_EXT(distribute_simd, "distribute simd") -__OMP_DIRECTIVE_EXT(target_parallel_for_simd, "target parallel for simd") -__OMP_DIRECTIVE_EXT(target_simd, "target simd") -__OMP_DIRECTIVE_EXT(teams_distribute, "teams distribute") -__OMP_DIRECTIVE_EXT(teams_distribute_simd, "teams distribute simd") -__OMP_DIRECTIVE_EXT(teams_distribute_parallel_for_simd, - "teams distribute parallel for simd") -__OMP_DIRECTIVE_EXT(teams_distribute_parallel_for, - "teams distribute parallel for") -__OMP_DIRECTIVE_EXT(target_teams, "target teams") -__OMP_DIRECTIVE_EXT(target_teams_distribute, "target teams distribute") -__OMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for, - "target teams distribute parallel for") -__OMP_DIRECTIVE_EXT(target_teams_distribute_parallel_for_simd, - "target teams distribute parallel for simd") -__OMP_DIRECTIVE_EXT(target_teams_distribute_simd, - "target teams distribute simd") -__OMP_DIRECTIVE(allocate) -__OMP_DIRECTIVE_EXT(declare_variant, "declare variant") -__OMP_DIRECTIVE_EXT(master_taskloop, "master taskloop") -__OMP_DIRECTIVE_EXT(parallel_master_taskloop, "parallel master taskloop") -__OMP_DIRECTIVE_EXT(master_taskloop_simd, "master taskloop simd") -__OMP_DIRECTIVE_EXT(parallel_master_taskloop_simd, - "parallel master taskloop simd") -__OMP_DIRECTIVE(depobj) -__OMP_DIRECTIVE(scan) -__OMP_DIRECTIVE_EXT(begin_declare_variant, "begin declare variant") -__OMP_DIRECTIVE_EXT(end_declare_variant, "end declare variant") - -// Has to be the last because Clang implicitly expects it to be. -__OMP_DIRECTIVE(unknown) - -#undef __OMP_DIRECTIVE_EXT -#undef __OMP_DIRECTIVE -#undef OMP_DIRECTIVE - -///} - -/// OpenMP Clauses -/// -///{ - -#ifndef OMP_CLAUSE -#define OMP_CLAUSE(Enum, Str, Implicit) -#endif -#ifndef OMP_CLAUSE_CLASS -#define OMP_CLAUSE_CLASS(Enum, Str, Class) -#endif -#ifndef OMP_CLAUSE_NO_CLASS -#define OMP_CLAUSE_NO_CLASS(Enum, Str) -#endif - -#define __OMP_CLAUSE(Name, Class) \ - OMP_CLAUSE(OMPC_##Name, #Name, /* Implicit */ false) \ - OMP_CLAUSE_CLASS(OMPC_##Name, #Name, Class) -#define __OMP_CLAUSE_NO_CLASS(Name) \ - OMP_CLAUSE(OMPC_##Name, #Name, /* Implicit */ false) \ - OMP_CLAUSE_NO_CLASS(OMPC_##Name, #Name) -#define __OMP_IMPLICIT_CLAUSE_CLASS(Name, Str, Class) \ - OMP_CLAUSE(OMPC_##Name, Str, /* Implicit */ true) \ - OMP_CLAUSE_CLASS(OMPC_##Name, Str, Class) -#define __OMP_IMPLICIT_CLAUSE_NO_CLASS(Name, Str) \ - OMP_CLAUSE(OMPC_##Name, Str, /* Implicit */ true) \ - OMP_CLAUSE_NO_CLASS(OMPC_##Name, Str) - -__OMP_CLAUSE(allocator, OMPAllocatorClause) -__OMP_CLAUSE(if, OMPIfClause) -__OMP_CLAUSE(final, OMPFinalClause) -__OMP_CLAUSE(num_threads, OMPNumThreadsClause) -__OMP_CLAUSE(safelen, OMPSafelenClause) -__OMP_CLAUSE(simdlen, OMPSimdlenClause) -__OMP_CLAUSE(collapse, OMPCollapseClause) -__OMP_CLAUSE(default, OMPDefaultClause) -__OMP_CLAUSE(private, OMPPrivateClause) -__OMP_CLAUSE(firstprivate, OMPFirstprivateClause) -__OMP_CLAUSE(lastprivate, OMPLastprivateClause) -__OMP_CLAUSE(shared, OMPSharedClause) -__OMP_CLAUSE(reduction, OMPReductionClause) -__OMP_CLAUSE(linear, OMPLinearClause) -__OMP_CLAUSE(aligned, OMPAlignedClause) -__OMP_CLAUSE(copyin, OMPCopyinClause) -__OMP_CLAUSE(copyprivate, OMPCopyprivateClause) -__OMP_CLAUSE(proc_bind, OMPProcBindClause) -__OMP_CLAUSE(schedule, OMPScheduleClause) -__OMP_CLAUSE(ordered, OMPOrderedClause) -__OMP_CLAUSE(nowait, OMPNowaitClause) -__OMP_CLAUSE(untied, OMPUntiedClause) -__OMP_CLAUSE(mergeable, OMPMergeableClause) -__OMP_CLAUSE(read, OMPReadClause) -__OMP_CLAUSE(write, OMPWriteClause) -__OMP_CLAUSE(update, OMPUpdateClause) -__OMP_CLAUSE(capture, OMPCaptureClause) -__OMP_CLAUSE(seq_cst, OMPSeqCstClause) -__OMP_CLAUSE(acq_rel, OMPAcqRelClause) -__OMP_CLAUSE(acquire, OMPAcquireClause) -__OMP_CLAUSE(release, OMPReleaseClause) -__OMP_CLAUSE(relaxed, OMPRelaxedClause) -__OMP_CLAUSE(depend, OMPDependClause) -__OMP_CLAUSE(device, OMPDeviceClause) -__OMP_CLAUSE(threads, OMPThreadsClause) -__OMP_CLAUSE(simd, OMPSIMDClause) -__OMP_CLAUSE(map, OMPMapClause) -__OMP_CLAUSE(num_teams, OMPNumTeamsClause) -__OMP_CLAUSE(thread_limit, OMPThreadLimitClause) -__OMP_CLAUSE(priority, OMPPriorityClause) -__OMP_CLAUSE(grainsize, OMPGrainsizeClause) -__OMP_CLAUSE(nogroup, OMPNogroupClause) -__OMP_CLAUSE(num_tasks, OMPNumTasksClause) -__OMP_CLAUSE(hint, OMPHintClause) -__OMP_CLAUSE(dist_schedule, OMPDistScheduleClause) -__OMP_CLAUSE(defaultmap, OMPDefaultmapClause) -__OMP_CLAUSE(to, OMPToClause) -__OMP_CLAUSE(from, OMPFromClause) -__OMP_CLAUSE(use_device_ptr, OMPUseDevicePtrClause) -__OMP_CLAUSE(is_device_ptr, OMPIsDevicePtrClause) -__OMP_CLAUSE(task_reduction, OMPTaskReductionClause) -__OMP_CLAUSE(in_reduction, OMPInReductionClause) -__OMP_CLAUSE(unified_address, OMPUnifiedAddressClause) -__OMP_CLAUSE(unified_shared_memory, OMPUnifiedSharedMemoryClause) -__OMP_CLAUSE(reverse_offload, OMPReverseOffloadClause) -__OMP_CLAUSE(dynamic_allocators, OMPDynamicAllocatorsClause) -__OMP_CLAUSE(atomic_default_mem_order, OMPAtomicDefaultMemOrderClause) -__OMP_CLAUSE(allocate, OMPAllocateClause) -__OMP_CLAUSE(nontemporal, OMPNontemporalClause) -__OMP_CLAUSE(order, OMPOrderClause) -__OMP_CLAUSE(destroy, OMPDestroyClause) -__OMP_CLAUSE(detach, OMPDetachClause) -__OMP_CLAUSE(inclusive, OMPInclusiveClause) -__OMP_CLAUSE(exclusive, OMPExclusiveClause) -__OMP_CLAUSE(uses_allocators, OMPUsesAllocatorsClause) -__OMP_CLAUSE(affinity, OMPAffinityClause) -__OMP_CLAUSE(use_device_addr, OMPUseDeviceAddrClause) - -__OMP_CLAUSE_NO_CLASS(uniform) -__OMP_CLAUSE_NO_CLASS(device_type) -__OMP_CLAUSE_NO_CLASS(match) - -__OMP_IMPLICIT_CLAUSE_CLASS(depobj, "depobj", OMPDepobjClause) -__OMP_IMPLICIT_CLAUSE_CLASS(flush, "flush", OMPFlushClause) - -__OMP_IMPLICIT_CLAUSE_NO_CLASS(threadprivate, "threadprivate or thread local") -__OMP_IMPLICIT_CLAUSE_NO_CLASS(unknown, "unknown") - -#undef __OMP_IMPLICIT_CLAUSE_NO_CLASS -#undef __OMP_IMPLICIT_CLAUSE_CLASS -#undef __OMP_CLAUSE -#undef OMP_CLAUSE_NO_CLASS -#undef OMP_CLAUSE_CLASS -#undef OMP_CLAUSE +//===----------------------------------------------------------------------===// -///} +/// OpenMP Directives, combined directives and Clauses +/// - Moved to OMP.td /// Types used in runtime structs or runtime functions /// @@ -232,13 +30,16 @@ __OMP_IMPLICIT_CLAUSE_NO_CLASS(unknown, "unknown") __OMP_TYPE(Void) __OMP_TYPE(Int1) __OMP_TYPE(Int8) +__OMP_TYPE(Int16) __OMP_TYPE(Int32) __OMP_TYPE(Int64) __OMP_TYPE(Int8Ptr) +__OMP_TYPE(Int16Ptr) __OMP_TYPE(Int32Ptr) __OMP_TYPE(Int64Ptr) OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx)) +OMP_TYPE(LanemaskTy, getLanemaskType()) #define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo()) @@ -286,6 +87,7 @@ __OMP_ARRAY_TYPE(KmpCriticalName, Int32, 8) OMP_STRUCT_TYPE(VarName, "struct." #Name, __VA_ARGS__) __OMP_STRUCT_TYPE(Ident, ident_t, Int32, Int32, Int32, Int32, Int8Ptr) +__OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, Int8Ptr) #undef __OMP_STRUCT_TYPE #undef OMP_STRUCT_TYPE @@ -305,6 +107,9 @@ __OMP_FUNCTION_TYPE(KmpcDtor, false, Void, VoidPtr) __OMP_FUNCTION_TYPE(KmpcCopyCtor, false, VoidPtr, VoidPtr, VoidPtr) __OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32, /* kmp_task_t */ VoidPtr) +__OMP_FUNCTION_TYPE(ShuffleReduce, false, Void, VoidPtr, Int16, Int16, Int16) +__OMP_FUNCTION_TYPE(InterWarpCopy, false, Void, VoidPtr, Int32) +__OMP_FUNCTION_TYPE(GlobalList, false, Void, VoidPtr, Int32, VoidPtr) #undef __OMP_FUNCTION_TYPE #undef OMP_FUNCTION_TYPE @@ -315,6 +120,20 @@ __OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32, /// ///{ +#ifndef ICV_INIT_VALUE +#define ICV_INIT_VALUE(Enum, Name) +#endif + +#define __ICV_INIT_VALUE(Name) ICV_INIT_VALUE(ICV_##Name, #Name) + +__ICV_INIT_VALUE(ZERO) +__ICV_INIT_VALUE(FALSE) +__ICV_INIT_VALUE(IMPLEMENTATION_DEFINED) +__ICV_INIT_VALUE(LAST) + +#undef __ICV_INIT_VALUE +#undef ICV_INIT_VALUE + #ifndef ICV_DATA_ENV #define ICV_DATA_ENV(Enum, Name, EnvVarName, Init) #endif @@ -325,6 +144,7 @@ __OMP_FUNCTION_TYPE(TaskRoutineEntry, false, Int32, Int32, __ICV_DATA_ENV(nthreads, OMP_NUM_THREADS, ICV_IMPLEMENTATION_DEFINED) __ICV_DATA_ENV(active_levels, NONE, ICV_ZERO) __ICV_DATA_ENV(cancel, OMP_CANCELLATION, ICV_FALSE) +__ICV_DATA_ENV(proc_bind, OMP_PROC_BIND, ICV_IMPLEMENTATION_DEFINED) __ICV_DATA_ENV(__last, last, ICV_LAST) #undef __ICV_DATA_ENV @@ -350,6 +170,7 @@ __ICV_RT_SET(nthreads, omp_set_num_threads) __ICV_RT_GET(nthreads, omp_get_max_threads) __ICV_RT_GET(active_levels, omp_get_active_level) __ICV_RT_GET(cancel, omp_get_cancellation) +__ICV_RT_GET(proc_bind, omp_get_proc_bind) #undef __ICV_RT_GET #undef ICV_RT_GET @@ -380,10 +201,9 @@ __OMP_RTL(__kmpc_omp_taskyield, false, Int32, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32, /* Int */ Int32) __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32) -__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32, - Int8Ptr, Int32, Int8Ptr) + /* kmp_task_t */ VoidPtr, Int32, + /* kmp_task_affinity_info_t */ VoidPtr) __OMP_RTL(omp_get_thread_num, false, Int32, ) __OMP_RTL(omp_get_num_threads, false, Int32, ) @@ -430,8 +250,7 @@ __OMP_RTL(__kmpc_reduce, false, Int32, IdentPtr, Int32, Int32, SizeTy, VoidPtr, ReduceFunctionPtr, KmpCriticalNamePtrTy) __OMP_RTL(__kmpc_reduce_nowait, false, Int32, IdentPtr, Int32, Int32, SizeTy, VoidPtr, ReduceFunctionPtr, KmpCriticalNamePtrTy) -__OMP_RTL(__kmpc_end_reduce, false, Void, IdentPtr, Int32, - KmpCriticalNamePtrTy) +__OMP_RTL(__kmpc_end_reduce, false, Void, IdentPtr, Int32, KmpCriticalNamePtrTy) __OMP_RTL(__kmpc_end_reduce_nowait, false, Void, IdentPtr, Int32, KmpCriticalNamePtrTy) @@ -514,10 +333,10 @@ __OMP_RTL(__kmpc_taskloop, false, Void, IdentPtr, /* Int */ Int32, VoidPtr, /* Int */ Int32, Int64, VoidPtr) __OMP_RTL(__kmpc_omp_target_task_alloc, false, /* kmp_task_t */ VoidPtr, IdentPtr, Int32, Int32, SizeTy, SizeTy, TaskRoutineEntryPtr, Int64) -__OMP_RTL(__kmpc_taskred_modifier_init, false, VoidPtr, IdentPtr, - /* Int */ Int32, /* Int */ Int32, /* Int */ Int32, VoidPtr) -__OMP_RTL(__kmpc_taskred_init, false, VoidPtr, /* Int */ Int32, - /* Int */ Int32, VoidPtr) +__OMP_RTL(__kmpc_taskred_modifier_init, false, /* kmp_taskgroup */ VoidPtr, + IdentPtr, /* Int */ Int32, /* Int */ Int32, /* Int */ Int32, VoidPtr) +__OMP_RTL(__kmpc_taskred_init, false, /* kmp_taskgroup */ VoidPtr, + /* Int */ Int32, /* Int */ Int32, VoidPtr) __OMP_RTL(__kmpc_task_reduction_modifier_fini, false, Void, IdentPtr, /* Int */ Int32, /* Int */ Int32) __OMP_RTL(__kmpc_task_reduction_get_th_data, false, VoidPtr, Int32, VoidPtr, @@ -556,45 +375,83 @@ __OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr, __OMP_RTL(__kmpc_destroy_allocator, false, Void, /* Int */ Int32, /* omp_allocator_handle_t */ VoidPtr) -__OMP_RTL(__kmpc_push_target_tripcount, false, Void, Int64, Int64) -__OMP_RTL(__tgt_target, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_nowait, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_teams, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr, Int32, Int32) -__OMP_RTL(__tgt_target_teams_nowait, false, Int32, Int64, VoidPtr, Int32, - VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, Int32, Int32) +__OMP_RTL(__kmpc_push_target_tripcount, false, Void, IdentPtr, Int64, Int64) +__OMP_RTL(__tgt_target_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, VoidPtrPtr, + VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) +__OMP_RTL(__tgt_target_nowait_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) +__OMP_RTL(__tgt_target_teams_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr, Int32, Int32) +__OMP_RTL(__tgt_target_teams_nowait_mapper, false, Int32, IdentPtr, Int64, VoidPtr, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr, Int32, Int32) __OMP_RTL(__tgt_register_requires, false, Void, Int64) -__OMP_RTL(__tgt_target_data_begin, false, Void, Int64, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_data_begin_nowait, false, Void, Int64, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_data_end, false, Void, Int64, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_data_end_nowait, false, Void, Int64, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_data_update, false, Void, Int64, Int32, VoidPtrPtr, - VoidPtrPtr, Int64Ptr, Int64Ptr) -__OMP_RTL(__tgt_target_data_update_nowait, false, Void, Int64, Int32, - VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr) +__OMP_RTL(__tgt_target_data_begin_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr, + VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, IdentPtr, Int64, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_begin_mapper_issue, false, Void, IdentPtr, Int64, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr, AsyncInfoPtr) +__OMP_RTL(__tgt_target_data_begin_mapper_wait, false, Void, Int64, AsyncInfoPtr) +__OMP_RTL(__tgt_target_data_end_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr, + VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, IdentPtr, Int64, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_update_mapper, false, Void, IdentPtr, Int64, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, IdentPtr, Int64, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) __OMP_RTL(__tgt_mapper_num_components, false, Int64, VoidPtr) __OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr, - Int64, Int64) + Int64, Int64, VoidPtr) __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr, /* Int */ Int32, /* kmp_task_t */ VoidPtr) -/// Note that device runtime functions (in the following) do not necessarily -/// need attributes as we expect to see the definitions. -__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) +/// OpenMP Device runtime functions +__OMP_RTL(__kmpc_kernel_init, false, Void, Int32, Int16) +__OMP_RTL(__kmpc_kernel_deinit, false, Void, Int16) +__OMP_RTL(__kmpc_spmd_kernel_init, false, Void, Int32, Int16) +__OMP_RTL(__kmpc_spmd_kernel_deinit_v2, false, Void, Int16) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) +__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) +__OMP_RTL(__kmpc_kernel_end_parallel, false, Void, ) +__OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_shuffle_int32, false, Int32, Int32, Int16, Int16) +__OMP_RTL(__kmpc_nvptx_parallel_reduce_nowait_v2, false, Int32, IdentPtr, Int32, + Int32, SizeTy, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr) +__OMP_RTL(__kmpc_nvptx_end_reduce_nowait, false, Void, Int32) +__OMP_RTL(__kmpc_nvptx_teams_reduce_nowait_v2, false, Int32, IdentPtr, Int32, + VoidPtr, Int32, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr, + GlobalListPtr, GlobalListPtr, GlobalListPtr, GlobalListPtr) + +__OMP_RTL(__kmpc_shuffle_int64, false, Int64, Int64, Int16, Int16) +__OMP_RTL(__kmpc_data_sharing_init_stack, false, Void, ) +__OMP_RTL(__kmpc_data_sharing_init_stack_spmd, false, Void, ) + +__OMP_RTL(__kmpc_data_sharing_coalesced_push_stack, false, VoidPtr, SizeTy, Int16) +__OMP_RTL(__kmpc_data_sharing_push_stack, false, VoidPtr, SizeTy, Int16) +__OMP_RTL(__kmpc_data_sharing_pop_stack, false, Void, VoidPtr) +__OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy) +__OMP_RTL(__kmpc_end_sharing_variables, false, Void, ) +__OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr) +__OMP_RTL(__kmpc_parallel_level, false, Int16, IdentPtr, Int32) +__OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, ) +__OMP_RTL(__kmpc_get_team_static_memory, false, Void, Int16, VoidPtr, SizeTy, + Int16, VoidPtrPtr) +__OMP_RTL(__kmpc_restore_team_static_memory, false, Void, Int16, Int16) +__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) + +__OMP_RTL(__kmpc_warp_active_thread_mask, false, LanemaskTy,) +__OMP_RTL(__kmpc_syncwarp, false, Void, LanemaskTy) __OMP_RTL(__last, false, Void, ) #undef __OMP_RTL #undef OMP_RTL +#define ParamAttrs(...) ArrayRef<AttributeSet>({__VA_ARGS__}) #define EnumAttr(Kind) Attribute::get(Ctx, Attribute::AttrKind::Kind) +#define EnumAttrInt(Kind, N) Attribute::get(Ctx, Attribute::AttrKind::Kind, N) #define AttributeSet(...) \ AttributeSet::get(Ctx, ArrayRef<Attribute>({__VA_ARGS__})) @@ -607,19 +464,94 @@ __OMP_RTL(__last, false, Void, ) __OMP_ATTRS_SET(GetterAttrs, OptimisticAttributes ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(ReadOnly), - EnumAttr(NoSync), EnumAttr(NoFree), EnumAttr(InaccessibleMemOnly)) + EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(InaccessibleMemOnly), + EnumAttr(WillReturn)) : AttributeSet(EnumAttr(NoUnwind))) __OMP_ATTRS_SET(GetterArgWriteAttrs, OptimisticAttributes ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), - EnumAttr(NoFree), EnumAttr(InaccessibleMemOrArgMemOnly)) + EnumAttr(NoFree), + EnumAttr(InaccessibleMemOrArgMemOnly), + EnumAttr(WillReturn)) : AttributeSet(EnumAttr(NoUnwind))) __OMP_ATTRS_SET(SetterAttrs, OptimisticAttributes ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(WriteOnly), - EnumAttr(NoSync), EnumAttr(NoFree), EnumAttr(InaccessibleMemOnly)) + EnumAttr(NoSync), EnumAttr(NoFree), + EnumAttr(InaccessibleMemOnly), + EnumAttr(WillReturn)) + : AttributeSet(EnumAttr(NoUnwind))) + +__OMP_ATTRS_SET(DefaultAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), + EnumAttr(WillReturn), EnumAttr(NoFree)) + : AttributeSet(EnumAttr(NoUnwind))) + +__OMP_ATTRS_SET(BarrierAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent)) + : AttributeSet(EnumAttr(NoUnwind), EnumAttr(Convergent))) + +__OMP_ATTRS_SET(InaccessibleArgOnlyAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), + EnumAttr(InaccessibleMemOrArgMemOnly), + EnumAttr(WillReturn), EnumAttr(NoFree)) : AttributeSet(EnumAttr(NoUnwind))) +#if 0 +__OMP_ATTRS_SET(InaccessibleOnlyAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), + EnumAttr(InaccessibleMemOnly), + EnumAttr(WillReturn), EnumAttr(NoFree)) + : AttributeSet(EnumAttr(NoUnwind))) +#endif + +__OMP_ATTRS_SET(AllocAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), + EnumAttr(WillReturn)) + : AttributeSet(EnumAttr(NoUnwind))) + +__OMP_ATTRS_SET(ForkAttrs, OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind)) + : AttributeSet(EnumAttr(NoUnwind))) + +__OMP_ATTRS_SET(ReadOnlyPtrAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoFree), + EnumAttr(NoCapture)) + : AttributeSet()) + +#if 0 +__OMP_ATTRS_SET(WriteOnlyPtrAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(WriteOnly), EnumAttr(NoFree), + EnumAttr(NoCapture)) + : AttributeSet()) +#endif + +__OMP_ATTRS_SET(ArgPtrAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoCapture), EnumAttr(NoFree)) + : AttributeSet()) + +__OMP_ATTRS_SET(ReturnPtrAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoAlias)) + : AttributeSet()) + +#if 0 +__OMP_ATTRS_SET(ReturnAlignedPtrAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoAlias), EnumAttrInt(Alignment, 8), + EnumAttrInt(DereferenceableOrNull, 8)) + : AttributeSet()) +#endif + #undef __OMP_ATTRS_SET #undef OMP_ATTRS_SET @@ -630,295 +562,314 @@ __OMP_ATTRS_SET(SetterAttrs, #define __OMP_RTL_ATTRS(Name, FnAttrSet, RetAttrSet, ArgAttrSets) \ OMP_RTL_ATTRS(OMPRTL_##Name, FnAttrSet, RetAttrSet, ArgAttrSets) -__OMP_RTL_ATTRS(__kmpc_barrier, AttributeSet(), AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_cancel, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_cancel_barrier, AttributeSet(), AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_flush, AttributeSet(), AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_fork_call, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_taskwait, AttributeSet(), AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_taskyield, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_push_num_threads, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_push_proc_bind, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_serialized_parallel, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_schedule, GetterArgWriteAttrs, AttributeSet(), - ArrayRef<AttributeSet>( - {AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)), - AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))})) -__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(), + ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_cancel, InaccessibleArgOnlyAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_cancel_barrier, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_flush, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_fork_call, ForkAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_taskwait, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_taskyield, InaccessibleArgOnlyAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_push_num_threads, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_push_proc_bind, InaccessibleArgOnlyAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_serialized_parallel, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_reg_task_with_affinity, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs, + AttributeSet(), ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS( + omp_get_schedule, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)), + AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)))) +__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(omp_get_supported_active_levels, GetterAttrs, AttributeSet(), - {}) -__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), {}) + ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(), + ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(), + ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(omp_get_place_proc_ids, GetterArgWriteAttrs, AttributeSet(), - ArrayRef<AttributeSet>({AttributeSet(), - AttributeSet(EnumAttr(NoCapture), - EnumAttr(WriteOnly))})) -__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(), {}) - -__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), {}) -__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_master, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_master, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_critical, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_critical_with_hint, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_critical, - AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_begin, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_reduce, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_reduce_nowait, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_reduce, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_reduce_nowait, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_ordered, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_ordered, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_for_static_init_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_for_static_init_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_for_static_init_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_for_static_init_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_for_static_fini, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_init_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_init_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_init_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_init_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_next_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_next_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_next_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_next_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_team_static_init_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_team_static_init_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_team_static_init_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_team_static_init_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8u, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_single, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_single, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_omp_task_alloc, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_task, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_end_taskgroup, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_taskgroup, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_task_begin_if0, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_task_complete_if0, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_task_with_deps, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_taskloop, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_omp_target_task_alloc, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_taskred_modifier_init, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_taskred_init, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_fini, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_task_reduction_get_th_data, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_task_reduction_init, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_init, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_proxy_task_completed_ooo, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_omp_wait_deps, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_cancellationpoint, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_fork_teams, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_push_num_teams, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_copyprivate, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_threadprivate_cached, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_threadprivate_register, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_doacross_init, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_doacross_post, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_doacross_wait, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_doacross_fini, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_alloc, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_free, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_init_allocator, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) - -__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_nowait, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_teams, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_teams_nowait, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_register_requires, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_data_begin, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_data_end, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_data_end_nowait, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_data_update, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_target_data_update_nowait, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_mapper_num_components, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__tgt_push_mapper_component, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) -__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, - AttributeSet(EnumAttr(NoUnwind)), - AttributeSet(), {}) + ParamAttrs(AttributeSet(), AttributeSet(EnumAttr(NoCapture), + EnumAttr(WriteOnly)))) +__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(), + ParamAttrs()) +__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(), + ParamAttrs()) + +__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(), + ParamAttrs()) + +__OMP_RTL_ATTRS(__kmpc_master, InaccessibleArgOnlyAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_end_master, InaccessibleArgOnlyAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_critical, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_critical_with_hint, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_end_critical, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet())) + +__OMP_RTL_ATTRS(__kmpc_begin, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_end, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_reduce, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet(), ReadOnlyPtrAttrs, AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_reduce_nowait, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet(), ReadOnlyPtrAttrs, AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_end_reduce, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_end_reduce_nowait, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet())) + +__OMP_RTL_ATTRS(__kmpc_ordered, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_end_ordered, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_for_static_init_4, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_for_static_init_4u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_for_static_init_8, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_for_static_init_8u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_for_static_fini, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4u, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8u, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_init_4, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_init_4u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_init_8, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_init_8u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_next_4, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_next_4u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_next_8, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_next_8u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4u, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8u, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_team_static_init_4, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_team_static_init_4u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_team_static_init_8, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_team_static_init_8u, GetterArgWriteAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs, + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4u, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + ArgPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8u, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + ArgPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_single, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_end_single, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_omp_task_alloc, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet(), AttributeSet(), ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_task, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_end_taskgroup, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_taskgroup, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_task_begin_if0, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_task_complete_if0, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_omp_task_with_deps, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet(), ReadOnlyPtrAttrs, AttributeSet(), + ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_taskloop, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet(), ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet(), AttributeSet(), + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_omp_target_task_alloc, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + AttributeSet(), AttributeSet(), ReadOnlyPtrAttrs, + AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_taskred_modifier_init, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_taskred_init, DefaultAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_fini, BarrierAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_task_reduction_get_th_data, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_task_reduction_init, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_init, DefaultAttrs, + ReturnPtrAttrs, ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_proxy_task_completed_ooo, DefaultAttrs, AttributeSet(), + ParamAttrs()) + +__OMP_RTL_ATTRS(__kmpc_omp_wait_deps, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_cancellationpoint, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_fork_teams, ForkAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_push_num_teams, InaccessibleArgOnlyAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_copyprivate, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_threadprivate_cached, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_threadprivate_register, DefaultAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs, + ReadOnlyPtrAttrs, ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_doacross_init, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_doacross_post, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_doacross_wait, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) + +__OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, {}) +__OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {}) + +__OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs, {}) +__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AllocAttrs, AttributeSet(), {}) + +__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, SetterAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_nowait_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_teams_nowait_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_register_requires, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs, + AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_data_end_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper, ForkAttrs, + AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_data_update_mapper, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs, + AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_mapper_num_components, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(), {}) +__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs, + ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs)) #undef __OMP_RTL_ATTRS #undef OMP_RTL_ATTRS #undef AttributeSet #undef EnumAttr +#undef EnumAttrInt +#undef ParamAttrs ///} @@ -1066,10 +1017,6 @@ __OMP_TRAIT_PROPERTY(device, kind, gpu) __OMP_TRAIT_PROPERTY(device, kind, fpga) __OMP_TRAIT_PROPERTY(device, kind, any) -__OMP_TRAIT_SELECTOR(device, isa, true) - -// TODO: What do we want for ISA? - __OMP_TRAIT_SELECTOR(device, arch, true) __OMP_TRAIT_PROPERTY(device, arch, arm) @@ -1078,6 +1025,7 @@ __OMP_TRAIT_PROPERTY(device, arch, aarch64) __OMP_TRAIT_PROPERTY(device, arch, aarch64_be) __OMP_TRAIT_PROPERTY(device, arch, aarch64_32) __OMP_TRAIT_PROPERTY(device, arch, ppc) +__OMP_TRAIT_PROPERTY(device, arch, ppcle) __OMP_TRAIT_PROPERTY(device, arch, ppc64) __OMP_TRAIT_PROPERTY(device, arch, ppc64le) __OMP_TRAIT_PROPERTY(device, arch, x86) @@ -1107,6 +1055,8 @@ __OMP_TRAIT_SELECTOR(implementation, extension, true) __OMP_TRAIT_PROPERTY(implementation, extension, match_all) __OMP_TRAIT_PROPERTY(implementation, extension, match_any) __OMP_TRAIT_PROPERTY(implementation, extension, match_none) +__OMP_TRAIT_PROPERTY(implementation, extension, disable_implicit_base) +__OMP_TRAIT_PROPERTY(implementation, extension, allow_templates) __OMP_TRAIT_SET(user) @@ -1116,6 +1066,18 @@ __OMP_TRAIT_PROPERTY(user, condition, true) __OMP_TRAIT_PROPERTY(user, condition, false) __OMP_TRAIT_PROPERTY(user, condition, unknown) + +// Note that we put isa last so that the other conditions are checked first. +// This allows us to issue warnings wrt. isa only if we match otherwise. +__OMP_TRAIT_SELECTOR(device, isa, true) + +// We use "__ANY" as a placeholder in the isa property to denote the +// conceptual "any", not the literal `any` used in kind. The string we +// we use is not important except that it will show up in diagnostics. +OMP_TRAIT_PROPERTY(device_isa___ANY, device, device_isa, + "<any, entirely target dependent>") + + #undef OMP_TRAIT_SET #undef __OMP_TRAIT_SET ///} @@ -1153,3 +1115,27 @@ OMP_LAST_TRAIT_PROPERTY( #undef __OMP_REQUIRES_TRAIT #undef OMP_REQUIRES_TRAIT ///} + + +/// Assumption clauses +/// +///{ + +#ifdef OMP_ASSUME_CLAUSE +#define __OMP_ASSUME_CLAUSE(Identifier, StartsWith, HasDirectiveList, HasExpression) \ +OMP_ASSUME_CLAUSE(Identifier, StartsWith, HasDirectiveList, HasExpression) +#else +#define __OMP_ASSUME_CLAUSE(...) +#endif + +__OMP_ASSUME_CLAUSE(llvm::StringLiteral("ext_"), true, false, false) +__OMP_ASSUME_CLAUSE(llvm::StringLiteral("absent"), false, true, false) +__OMP_ASSUME_CLAUSE(llvm::StringLiteral("contains"), false, true, false) +__OMP_ASSUME_CLAUSE(llvm::StringLiteral("holds"), false, false, true) +__OMP_ASSUME_CLAUSE(llvm::StringLiteral("no_openmp"), false, false, false) +__OMP_ASSUME_CLAUSE(llvm::StringLiteral("no_openmp_routines"), false, false, false) +__OMP_ASSUME_CLAUSE(llvm::StringLiteral("no_parallelism"), false, false, false) + +#undef __OMP_ASSUME_CLAUSE +#undef OMP_ASSUME_CLAUSE +///} diff --git a/llvm/include/llvm/FuzzMutate/IRMutator.h b/llvm/include/llvm/FuzzMutate/IRMutator.h index 40a1ce8aeec9..423582eace9b 100644 --- a/llvm/include/llvm/FuzzMutate/IRMutator.h +++ b/llvm/include/llvm/FuzzMutate/IRMutator.h @@ -102,6 +102,17 @@ public: void mutate(Instruction &Inst, RandomIRBuilder &IB) override; }; +class InstModificationIRStrategy : public IRMutationStrategy { +public: + uint64_t getWeight(size_t CurrentSize, size_t MaxSize, + uint64_t CurrentWeight) override { + return 4; + } + + using IRMutationStrategy::mutate; + void mutate(Instruction &Inst, RandomIRBuilder &IB) override; +}; + } // end llvm namespace #endif // LLVM_FUZZMUTATE_IRMUTATOR_H diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h index af469e8a5d1a..76d780485ea0 100644 --- a/llvm/include/llvm/IR/Argument.h +++ b/llvm/include/llvm/IR/Argument.h @@ -52,7 +52,9 @@ public: /// Return true if this argument has the nonnull attribute. Also returns true /// if at least one byte is known to be dereferenceable and the pointer is in /// addrspace(0). - bool hasNonNullAttr() const; + /// If AllowUndefOrPoison is true, respect the semantics of nonnull attribute + /// and return true even if the argument can be undef or poison. + bool hasNonNullAttr(bool AllowUndefOrPoison = true) const; /// If this argument has the dereferenceable attribute, return the number of /// bytes known to be dereferenceable. Otherwise, zero is returned. @@ -65,6 +67,9 @@ public: /// Return true if this argument has the byval attribute. bool hasByValAttr() const; + /// Return true if this argument has the byref attribute. + bool hasByRefAttr() const; + /// Return true if this argument has the swiftself attribute. bool hasSwiftSelfAttr() const; @@ -72,13 +77,23 @@ public: bool hasSwiftErrorAttr() const; /// Return true if this argument has the byval, inalloca, or preallocated - /// attribute. These attributes represent arguments being passed by value. - bool hasPassPointeeByValueAttr() const; + /// attribute. These attributes represent arguments being passed by value, + /// with an associated copy between the caller and callee + bool hasPassPointeeByValueCopyAttr() const; /// If this argument satisfies has hasPassPointeeByValueAttr, return the /// in-memory ABI size copied to the stack for the call. Otherwise, return 0. uint64_t getPassPointeeByValueCopySize(const DataLayout &DL) const; + /// Return true if this argument has the byval, sret, inalloca, preallocated, + /// or byref attribute. These attributes represent arguments being passed by + /// value (which may or may not involve a stack copy) + bool hasPointeeInMemoryValueAttr() const; + + /// If hasPointeeInMemoryValueAttr returns true, the in-memory ABI type is + /// returned. Otherwise, nullptr. + Type *getPointeeInMemoryValueType() const; + /// If this is a byval or inalloca argument, return its alignment. /// FIXME: Remove this function once transition to Align is over. /// Use getParamAlign() instead. @@ -90,6 +105,12 @@ public: /// If this is a byval argument, return its type. Type *getParamByValType() const; + /// If this is an sret argument, return its type. + Type *getParamStructRetType() const; + + /// If this is a byref argument, return its type. + Type *getParamByRefType() const; + /// Return true if this argument has the nest attribute. bool hasNestAttr() const; diff --git a/llvm/include/llvm/IR/Assumptions.h b/llvm/include/llvm/IR/Assumptions.h new file mode 100644 index 000000000000..f64616c25d87 --- /dev/null +++ b/llvm/include/llvm/IR/Assumptions.h @@ -0,0 +1,50 @@ +//===--- Assumptions.h - Assumption handling and organization ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// String assumptions that are known to optimization passes should be placed in +// the KnownAssumptionStrings set. This can be done in various ways, i.a., +// via a static KnownAssumptionString object. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_ASSUMPTIONS_H +#define LLVM_IR_ASSUMPTIONS_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" + +namespace llvm { + +class Function; + +/// The key we use for assumption attributes. +constexpr StringRef AssumptionAttrKey = "llvm.assume"; + +/// A set of known assumption strings that are accepted without warning and +/// which can be recommended as typo correction. +extern StringSet<> KnownAssumptionStrings; + +/// Helper that allows to insert a new assumption string in the known assumption +/// set by creating a (static) object. +struct KnownAssumptionString { + KnownAssumptionString(StringRef AssumptionStr) + : AssumptionStr(AssumptionStr) { + KnownAssumptionStrings.insert(AssumptionStr); + } + operator StringRef() const { return AssumptionStr; } + +private: + StringRef AssumptionStr; +}; + +/// Return true if \p F has the assumption \p AssumptionStr attached. +bool hasAssumption(Function &F, const KnownAssumptionString &AssumptionStr); + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h index 58365aa2b764..b4056540663f 100644 --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -108,8 +108,17 @@ public: unsigned ElemSizeArg, const Optional<unsigned> &NumElemsArg); static Attribute getWithByValType(LLVMContext &Context, Type *Ty); + static Attribute getWithStructRetType(LLVMContext &Context, Type *Ty); + static Attribute getWithByRefType(LLVMContext &Context, Type *Ty); static Attribute getWithPreallocatedType(LLVMContext &Context, Type *Ty); + /// For a typed attribute, return the equivalent attribute with the type + /// changed to \p ReplacementTy. + Attribute getWithNewType(LLVMContext &Context, Type *ReplacementTy) { + assert(isTypeAttribute() && "this requires a typed attribute"); + return get(Context, getKindAsEnum(), ReplacementTy); + } + static Attribute::AttrKind getAttrKindFromName(StringRef AttrName); static StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind); @@ -138,6 +147,9 @@ public: /// Return true if the attribute is a type attribute. bool isTypeAttribute() const; + /// Return true if the attribute is any kind of attribute. + bool isValid() const { return pImpl; } + /// Return true if the attribute is present. bool hasAttribute(AttrKind Val) const; @@ -303,6 +315,8 @@ public: uint64_t getDereferenceableBytes() const; uint64_t getDereferenceableOrNullBytes() const; Type *getByValType() const; + Type *getStructRetType() const; + Type *getByRefType() const; Type *getPreallocatedType() const; std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const; std::string getAsString(bool InAttrGrp = false) const; @@ -385,6 +399,9 @@ private: static AttributeList getImpl(LLVMContext &C, ArrayRef<AttributeSet> AttrSets); + AttributeList setAttributes(LLVMContext &C, unsigned Index, + AttributeSet Attrs) const; + public: AttributeList() = default; @@ -503,6 +520,17 @@ public: return removeAttributes(C, ArgNo + FirstArgIndex); } + /// Replace the type contained by attribute \p AttrKind at index \p ArgNo wih + /// \p ReplacementTy, preserving all other attributes. + LLVM_NODISCARD AttributeList replaceAttributeType(LLVMContext &C, + unsigned ArgNo, + Attribute::AttrKind Kind, + Type *ReplacementTy) const { + Attribute Attr = getAttribute(ArgNo, Kind); + auto Attrs = removeAttribute(C, ArgNo, Kind); + return Attrs.addAttribute(C, ArgNo, Attr.getWithNewType(C, ReplacementTy)); + } + /// \brief Add the dereferenceable attribute to the attribute set at the given /// index. Returns a new list because attribute lists are immutable. LLVM_NODISCARD AttributeList addDereferenceableAttr(LLVMContext &C, @@ -626,6 +654,12 @@ public: /// Return the byval type for the specified function parameter. Type *getParamByValType(unsigned ArgNo) const; + /// Return the sret type for the specified function parameter. + Type *getParamStructRetType(unsigned ArgNo) const; + + /// Return the byref type for the specified function parameter. + Type *getParamByRefType(unsigned ArgNo) const; + /// Return the preallocated type for the specified function parameter. Type *getParamPreallocatedType(unsigned ArgNo) const; @@ -729,6 +763,8 @@ class AttrBuilder { uint64_t DerefOrNullBytes = 0; uint64_t AllocSizeArgs = 0; Type *ByValType = nullptr; + Type *StructRetType = nullptr; + Type *ByRefType = nullptr; Type *PreallocatedType = nullptr; public: @@ -744,7 +780,14 @@ public: void clear(); /// Add an attribute to the builder. - AttrBuilder &addAttribute(Attribute::AttrKind Val); + AttrBuilder &addAttribute(Attribute::AttrKind Val) { + assert((unsigned)Val < Attribute::EndAttrKinds && + "Attribute out of range!"); + assert(!Attribute::doesAttrKindHaveArgument(Val) && + "Adding integer attribute without adding a value!"); + Attrs[Val] = true; + return *this; + } /// Add the Attribute object to the builder. AttrBuilder &addAttribute(Attribute A); @@ -808,6 +851,12 @@ public: /// Retrieve the byval type. Type *getByValType() const { return ByValType; } + /// Retrieve the sret type. + Type *getStructRetType() const { return StructRetType; } + + /// Retrieve the byref type. + Type *getByRefType() const { return ByRefType; } + /// Retrieve the preallocated type. Type *getPreallocatedType() const { return PreallocatedType; } @@ -854,6 +903,12 @@ public: /// This turns a byval type into the form used internally in Attribute. AttrBuilder &addByValAttr(Type *Ty); + /// This turns a sret type into the form used internally in Attribute. + AttrBuilder &addStructRetAttr(Type *Ty); + + /// This turns a byref type into the form used internally in Attribute. + AttrBuilder &addByRefAttr(Type *Ty); + /// This turns a preallocated type into the form used internally in Attribute. AttrBuilder &addPreallocatedAttr(Type *Ty); @@ -886,10 +941,8 @@ public: bool td_empty() const { return TargetDepAttrs.empty(); } - bool operator==(const AttrBuilder &B); - bool operator!=(const AttrBuilder &B) { - return !(*this == B); - } + bool operator==(const AttrBuilder &B) const; + bool operator!=(const AttrBuilder &B) const { return !(*this == B); } }; namespace AttributeFuncs { @@ -901,9 +954,24 @@ AttrBuilder typeIncompatible(Type *Ty); /// attributes for inlining purposes. bool areInlineCompatible(const Function &Caller, const Function &Callee); + +/// Checks if there are any incompatible function attributes between +/// \p A and \p B. +/// +/// \param [in] A - The first function to be compared with. +/// \param [in] B - The second function to be compared with. +/// \returns true if the functions have compatible attributes. +bool areOutlineCompatible(const Function &A, const Function &B); + /// Merge caller's and callee's attributes. void mergeAttributesForInlining(Function &Caller, const Function &Callee); +/// Merges the functions attributes from \p ToMerge into function \p Base. +/// +/// \param [in,out] Base - The function being merged into. +/// \param [in] ToMerge - The function to merge attributes from. +void mergeAttributesForOutlining(Function &Base, const Function &ToMerge); + } // end namespace AttributeFuncs } // end namespace llvm diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index 395f9dbfb176..f7ffc888c65a 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -1,3 +1,15 @@ +//===- Attributes.td - Defines all LLVM attributes ---------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines all the LLVM attributes. +// +//===----------------------------------------------------------------------===// + /// Attribute base class. class Attr<string S> { // String representation of this attribute in the IR. @@ -39,6 +51,9 @@ def Builtin : EnumAttr<"builtin">; /// Pass structure by value. def ByVal : TypeAttr<"byval">; +/// Mark in-memory ABI type. +def ByRef : TypeAttr<"byref">; + /// Parameter or return value may not contain uninitialized or poison bits. def NoUndef : EnumAttr<"noundef">; @@ -48,6 +63,9 @@ def Cold : EnumAttr<"cold">; /// Can only be moved to control-equivalent blocks. def Convergent : EnumAttr<"convergent">; +/// Marks function as being in a hot path and frequently called. +def Hot: EnumAttr<"hot">; + /// Pointer is known to be dereferenceable. def Dereferenceable : IntAttr<"dereferenceable">; @@ -88,6 +106,9 @@ def NoAlias : EnumAttr<"noalias">; /// Callee isn't recognized as a builtin. def NoBuiltin : EnumAttr<"nobuiltin">; +/// Function cannot enter into caller's translation unit. +def NoCallback : EnumAttr<"nocallback">; + /// Function creates no aliases of pointer. def NoCapture : EnumAttr<"nocapture">; @@ -106,7 +127,7 @@ def NoInline : EnumAttr<"noinline">; /// Function is called early and/or often, so lazy binding isn't worthwhile. def NonLazyBind : EnumAttr<"nonlazybind">; -/// Disable merging for call sites +/// Disable merging for specified functions or call sites. def NoMerge : EnumAttr<"nomerge">; /// Pointer is known to be not null. @@ -127,6 +148,9 @@ def NoSync : EnumAttr<"nosync">; /// Disable Indirect Branch Tracking. def NoCfCheck : EnumAttr<"nocf_check">; +/// Function should be instrumented. +def NoProfile : EnumAttr<"noprofile">; + /// Function doesn't unwind stack. def NoUnwind : EnumAttr<"nounwind">; @@ -189,7 +213,7 @@ def StackProtectStrong : EnumAttr<"sspstrong">; def StrictFP : EnumAttr<"strictfp">; /// Hidden pointer to structure to return. -def StructRet : EnumAttr<"sret">; +def StructRet : TypeAttr<"sret">; /// AddressSanitizer is on. def SanitizeAddress : EnumAttr<"sanitize_address">; @@ -232,6 +256,9 @@ def WriteOnly : EnumAttr<"writeonly">; /// Zero extended before/after call. def ZExt : EnumAttr<"zeroext">; +/// Function is required to make Forward Progress. +def MustProgress : TypeAttr<"mustprogress">; + /// Target-independent string attributes. def LessPreciseFPMAD : StrBoolAttr<"less-precise-fpmad">; def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">; @@ -285,3 +312,4 @@ def : MergeRule<"adjustCallerStackProbes">; def : MergeRule<"adjustCallerStackProbeSize">; def : MergeRule<"adjustMinLegalVectorWidth">; def : MergeRule<"adjustNullPointerValidAttr">; +def : MergeRule<"setAND<MustProgressAttr>">; diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h index 24d568a728c6..b86bb16e1239 100644 --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -165,19 +165,24 @@ public: } /// Returns a pointer to the first instruction in this block that is not a - /// PHINode or a debug intrinsic. - const Instruction* getFirstNonPHIOrDbg() const; - Instruction* getFirstNonPHIOrDbg() { + /// PHINode or a debug intrinsic, or any pseudo operation if \c SkipPseudoOp + /// is true. + const Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) const; + Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) { return const_cast<Instruction *>( - static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbg()); + static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbg( + SkipPseudoOp)); } /// Returns a pointer to the first instruction in this block that is not a - /// PHINode, a debug intrinsic, or a lifetime intrinsic. - const Instruction* getFirstNonPHIOrDbgOrLifetime() const; - Instruction* getFirstNonPHIOrDbgOrLifetime() { + /// PHINode, a debug intrinsic, or a lifetime intrinsic, or any pseudo + /// operation if \c SkipPseudoOp is true. + const Instruction * + getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) const; + Instruction *getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) { return const_cast<Instruction *>( - static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbgOrLifetime()); + static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbgOrLifetime( + SkipPseudoOp)); } /// Returns an iterator to the first instruction in this block that is @@ -191,16 +196,18 @@ public: } /// Return a const iterator range over the instructions in the block, skipping - /// any debug instructions. + /// any debug instructions. Skip any pseudo operations as well if \c + /// SkipPseudoOp is true. iterator_range<filter_iterator<BasicBlock::const_iterator, std::function<bool(const Instruction &)>>> - instructionsWithoutDebug() const; + instructionsWithoutDebug(bool SkipPseudoOp = false) const; /// Return an iterator range over the instructions in the block, skipping any - /// debug instructions. - iterator_range<filter_iterator<BasicBlock::iterator, - std::function<bool(Instruction &)>>> - instructionsWithoutDebug(); + /// debug instructions. Skip and any pseudo operations as well if \c + /// SkipPseudoOp is true. + iterator_range< + filter_iterator<BasicBlock::iterator, std::function<bool(Instruction &)>>> + instructionsWithoutDebug(bool SkipPseudoOp = false); /// Return the size of the basic block ignoring debug instructions filter_iterator<BasicBlock::const_iterator, @@ -320,7 +327,9 @@ public: phi_iterator_impl() = default; // Allow conversion between instantiations where valid. - template <typename PHINodeU, typename BBIteratorU> + template <typename PHINodeU, typename BBIteratorU, + typename = std::enable_if_t< + std::is_convertible<PHINodeU *, PHINodeT *>::value>> phi_iterator_impl(const phi_iterator_impl<PHINodeU, BBIteratorU> &Arg) : PN(Arg.PN) {} @@ -389,22 +398,49 @@ public: /// Split the basic block into two basic blocks at the specified instruction. /// - /// Note that all instructions BEFORE the specified iterator stay as part of - /// the original basic block, an unconditional branch is added to the original - /// BB, and the rest of the instructions in the BB are moved to the new BB, - /// including the old terminator. The newly formed BasicBlock is returned. - /// This function invalidates the specified iterator. + /// If \p Before is true, splitBasicBlockBefore handles the + /// block splitting. Otherwise, execution proceeds as described below. + /// + /// Note that all instructions BEFORE the specified iterator + /// stay as part of the original basic block, an unconditional branch is added + /// to the original BB, and the rest of the instructions in the BB are moved + /// to the new BB, including the old terminator. The newly formed basic block + /// is returned. This function invalidates the specified iterator. /// /// Note that this only works on well formed basic blocks (must have a - /// terminator), and 'I' must not be the end of instruction list (which would - /// cause a degenerate basic block to be formed, having a terminator inside of - /// the basic block). + /// terminator), and \p 'I' must not be the end of instruction list (which + /// would cause a degenerate basic block to be formed, having a terminator + /// inside of the basic block). /// /// Also note that this doesn't preserve any passes. To split blocks while /// keeping loop information consistent, use the SplitBlock utility function. - BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = ""); - BasicBlock *splitBasicBlock(Instruction *I, const Twine &BBName = "") { - return splitBasicBlock(I->getIterator(), BBName); + BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = "", + bool Before = false); + BasicBlock *splitBasicBlock(Instruction *I, const Twine &BBName = "", + bool Before = false) { + return splitBasicBlock(I->getIterator(), BBName, Before); + } + + /// Split the basic block into two basic blocks at the specified instruction + /// and insert the new basic blocks as the predecessor of the current block. + /// + /// This function ensures all instructions AFTER and including the specified + /// iterator \p I are part of the original basic block. All Instructions + /// BEFORE the iterator \p I are moved to the new BB and an unconditional + /// branch is added to the new BB. The new basic block is returned. + /// + /// Note that this only works on well formed basic blocks (must have a + /// terminator), and \p 'I' must not be the end of instruction list (which + /// would cause a degenerate basic block to be formed, having a terminator + /// inside of the basic block). \p 'I' cannot be a iterator for a PHINode + /// with multiple incoming blocks. + /// + /// Also note that this doesn't preserve any passes. To split blocks while + /// keeping loop information consistent, use the SplitBlockBefore utility + /// function. + BasicBlock *splitBasicBlockBefore(iterator I, const Twine &BBName = ""); + BasicBlock *splitBasicBlockBefore(Instruction *I, const Twine &BBName = "") { + return splitBasicBlockBefore(I->getIterator(), BBName); } /// Returns true if there are any uses of this basic block other than diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h index d0906de3ea4e..6a4e368b2e9d 100644 --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -241,6 +241,9 @@ namespace CallingConv { /// The remainder matches the regular calling convention. WASM_EmscriptenInvoke = 99, + /// Calling convention used for AMD graphics targets. + AMDGPU_Gfx = 100, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h index 9a1d2b80c48e..0190aca27b72 100644 --- a/llvm/include/llvm/IR/Constant.h +++ b/llvm/include/llvm/IR/Constant.h @@ -78,11 +78,13 @@ public: bool isMinSignedValue() const; /// Return true if this is a finite and non-zero floating-point scalar - /// constant or a vector constant with all finite and non-zero elements. + /// constant or a fixed width vector constant with all finite and non-zero + /// elements. bool isFiniteNonZeroFP() const; - /// Return true if this is a normal (as opposed to denormal) floating-point - /// scalar constant or a vector constant with all normal elements. + /// Return true if this is a normal (as opposed to denormal, infinity, nan, + /// or zero) floating-point scalar constant or a vector constant with all + /// normal elements. See APFloat::isNormal. bool isNormalFP() const; /// Return true if this scalar has an exact multiplicative inverse or this @@ -99,12 +101,18 @@ public: /// lane, the constants still match. bool isElementWiseEqual(Value *Y) const; - /// Return true if this is a vector constant that includes any undefined + /// Return true if this is a vector constant that includes any undef or + /// poison elements. Since it is impossible to inspect a scalable vector + /// element- wise at compile time, this function returns true only if the + /// entire vector is undef or poison. + bool containsUndefOrPoisonElement() const; + + /// Return true if this is a vector constant that includes any poison /// elements. - bool containsUndefElement() const; + bool containsPoisonElement() const; - /// Return true if this is a vector constant that includes any constant - /// expressions. + /// Return true if this is a fixed width vector constant that includes + /// any constant expressions. bool containsConstantExpression() const; /// Return true if evaluation of this constant could trap. This is true for @@ -200,6 +208,12 @@ public: /// Try to replace undefined constant C or undefined elements in C with /// Replacement. If no changes are made, the constant C is returned. static Constant *replaceUndefsWith(Constant *C, Constant *Replacement); + + /// Merges undefs of a Constant with another Constant, along with the + /// undefs already present. Other doesn't have to be the same type as C, but + /// both must either be scalars or vectors with the same element count. If no + /// changes are made, the constant C is returned. + static Constant *mergeUndefsWith(Constant *C, Constant *Other); }; } // end namespace llvm diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h index 8ecb9aa0ce02..20e8e67436a4 100644 --- a/llvm/include/llvm/IR/ConstantRange.h +++ b/llvm/include/llvm/IR/ConstantRange.h @@ -150,6 +150,14 @@ public: const APInt &Other, unsigned NoWrapKind); + /// Returns true if ConstantRange calculations are supported for intrinsic + /// with \p IntrinsicID. + static bool isIntrinsicSupported(Intrinsic::ID IntrinsicID); + + /// Compute range of intrinsic result for the given operand ranges. + static ConstantRange intrinsic(Intrinsic::ID IntrinsicID, + ArrayRef<ConstantRange> Ops); + /// Set up \p Pred and \p RHS such that /// ConstantRange::makeExactICmpRegion(Pred, RHS) == *this. Return true if /// successful. @@ -253,6 +261,14 @@ public: return !operator==(CR); } + /// Compute the maximal number of active bits needed to represent every value + /// in this range. + unsigned getActiveBits() const; + + /// Compute the maximal number of bits needed to represent every value + /// in this signed range. + unsigned getMinSignedBits() const; + /// Subtract the specified constant from the endpoints of this constant range. ConstantRange subtract(const APInt &CI) const; @@ -401,6 +417,11 @@ public: /// value in \p Other. ConstantRange srem(const ConstantRange &Other) const; + /// Return a new range representing the possible values resulting from + /// a binary-xor of a value in this range by an all-one value, + /// aka bitwise complement operation. + ConstantRange binaryNot() const; + /// Return a new range representing the possible values resulting /// from a binary-and of a value in this range by a value in \p Other. ConstantRange binaryAnd(const ConstantRange &Other) const; @@ -456,8 +477,9 @@ public: ConstantRange inverse() const; /// Calculate absolute value range. If the original range contains signed - /// min, then the resulting range will also contain signed min. - ConstantRange abs() const; + /// min, then the resulting range will contain signed min if and only if + /// \p IntMinIsPoison is false. + ConstantRange abs(bool IntMinIsPoison = false) const; /// Represents whether an operation on the given constant range is known to /// always or never overflow. diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index 8e2dba9b2417..ac802232c23d 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -88,8 +88,10 @@ public: static ConstantInt *getTrue(LLVMContext &Context); static ConstantInt *getFalse(LLVMContext &Context); + static ConstantInt *getBool(LLVMContext &Context, bool V); static Constant *getTrue(Type *Ty); static Constant *getFalse(Type *Ty); + static Constant *getBool(Type *Ty, bool V); /// If Ty is a vector type, return a Constant with a splat of the given /// value. Otherwise return a ConstantInt for the given value. @@ -592,14 +594,13 @@ class ConstantDataSequential : public ConstantData { /// the same value but different type. For example, 0,0,0,1 could be a 4 /// element array of i8, or a 1-element array of i32. They'll both end up in /// the same StringMap bucket, linked up. - ConstantDataSequential *Next; + std::unique_ptr<ConstantDataSequential> Next; void destroyConstantImpl(); protected: explicit ConstantDataSequential(Type *ty, ValueTy VT, const char *Data) - : ConstantData(ty, VT), DataElements(Data), Next(nullptr) {} - ~ConstantDataSequential() { delete Next; } + : ConstantData(ty, VT), DataElements(Data) {} static Constant *getImpl(StringRef Bytes, Type *Ty); @@ -889,6 +890,42 @@ struct OperandTraits<BlockAddress> : DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BlockAddress, Value) +/// Wrapper for a function that represents a value that +/// functionally represents the original function. This can be a function, +/// global alias to a function, or an ifunc. +class DSOLocalEquivalent final : public Constant { + friend class Constant; + + DSOLocalEquivalent(GlobalValue *GV); + + void *operator new(size_t s) { return User::operator new(s, 1); } + + void destroyConstantImpl(); + Value *handleOperandChangeImpl(Value *From, Value *To); + +public: + /// Return a DSOLocalEquivalent for the specified global value. + static DSOLocalEquivalent *get(GlobalValue *GV); + + /// Transparently provide more efficient getOperand methods. + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + + GlobalValue *getGlobalValue() const { + return cast<GlobalValue>(Op<0>().get()); + } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const Value *V) { + return V->getValueID() == DSOLocalEquivalentVal; + } +}; + +template <> +struct OperandTraits<DSOLocalEquivalent> + : public FixedNumOperandTraits<DSOLocalEquivalent, 1> {}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(DSOLocalEquivalent, Value) + //===----------------------------------------------------------------------===// /// A constant value that is initialized with an expression using /// other constant values. @@ -959,6 +996,7 @@ public: static Constant *getAnd(Constant *C1, Constant *C2); static Constant *getOr(Constant *C1, Constant *C2); static Constant *getXor(Constant *C1, Constant *C2); + static Constant *getUMin(Constant *C1, Constant *C2); static Constant *getShl(Constant *C1, Constant *C2, bool HasNUW = false, bool HasNSW = false); static Constant *getLShr(Constant *C1, Constant *C2, bool isExact = false); @@ -1034,6 +1072,12 @@ public: return getLShr(C1, C2, true); } + /// If C is a scalar/fixed width vector of known powers of 2, then this + /// function returns a new scalar/fixed width vector obtained from logBase2 + /// of C. Undef vector elements are set to zero. + /// Return a null pointer otherwise. + static Constant *getExactLogBase2(Constant *C); + /// Return the identity constant for a binary opcode. /// The identity constant C is defined as X op C = X and C op X = X for every /// X when the binary operation is commutative. If the binop is not @@ -1306,13 +1350,16 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantExpr, Constant) /// can appear to have different bit patterns at each use. See /// LangRef.html#undefvalues for details. /// -class UndefValue final : public ConstantData { +class UndefValue : public ConstantData { friend class Constant; explicit UndefValue(Type *T) : ConstantData(T, UndefValueVal) {} void destroyConstantImpl(); +protected: + explicit UndefValue(Type *T, ValueTy vty) : ConstantData(T, vty) {} + public: UndefValue(const UndefValue &) = delete; @@ -1339,7 +1386,49 @@ public: /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Value *V) { - return V->getValueID() == UndefValueVal; + return V->getValueID() == UndefValueVal || + V->getValueID() == PoisonValueVal; + } +}; + +//===----------------------------------------------------------------------===// +/// In order to facilitate speculative execution, many instructions do not +/// invoke immediate undefined behavior when provided with illegal operands, +/// and return a poison value instead. +/// +/// see LangRef.html#poisonvalues for details. +/// +class PoisonValue final : public UndefValue { + friend class Constant; + + explicit PoisonValue(Type *T) : UndefValue(T, PoisonValueVal) {} + + void destroyConstantImpl(); + +public: + PoisonValue(const PoisonValue &) = delete; + + /// Static factory methods - Return an 'poison' object of the specified type. + static PoisonValue *get(Type *T); + + /// If this poison has array or vector type, return a poison with the right + /// element type. + PoisonValue *getSequentialElement() const; + + /// If this poison has struct type, return a poison with the right element + /// type for the specified element. + PoisonValue *getStructElement(unsigned Elt) const; + + /// Return an poison of the right value for the specified GEP index if we can, + /// otherwise return null (e.g. if C is a ConstantExpr). + PoisonValue *getElementValue(Constant *C) const; + + /// Return an poison of the right value for the specified GEP index. + PoisonValue *getElementValue(unsigned Idx) const; + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const Value *V) { + return V->getValueID() == PoisonValueVal; } }; diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index d1c7d126b5a9..e0238567f251 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -199,6 +199,12 @@ namespace llvm { unsigned Encoding, DINode::DIFlags Flags = DINode::FlagZero); + /// Create debugging information entry for a string + /// type. + /// \param Name Type name. + /// \param SizeInBits Size of the type. + DIStringType *createStringType(StringRef Name, uint64_t SizeInBits); + /// Create debugging information entry for a qualified /// type, e.g. 'const int'. /// \param Tag Tag identifing type, e.g. dwarf::TAG_volatile_type @@ -488,8 +494,24 @@ namespace llvm { /// \param AlignInBits Alignment. /// \param Ty Element type. /// \param Subscripts Subscripts. - DICompositeType *createArrayType(uint64_t Size, uint32_t AlignInBits, - DIType *Ty, DINodeArray Subscripts); + /// \param DataLocation The location of the raw data of a descriptor-based + /// Fortran array, either a DIExpression* or + /// a DIVariable*. + /// \param Associated The associated attribute of a descriptor-based + /// Fortran array, either a DIExpression* or + /// a DIVariable*. + /// \param Allocated The allocated attribute of a descriptor-based + /// Fortran array, either a DIExpression* or + /// a DIVariable*. + /// \param Rank The rank attribute of a descriptor-based + /// Fortran array, either a DIExpression* or + /// a DIVariable*. + DICompositeType *createArrayType( + uint64_t Size, uint32_t AlignInBits, DIType *Ty, DINodeArray Subscripts, + PointerUnion<DIExpression *, DIVariable *> DataLocation = nullptr, + PointerUnion<DIExpression *, DIVariable *> Associated = nullptr, + PointerUnion<DIExpression *, DIVariable *> Allocated = nullptr, + PointerUnion<DIExpression *, DIVariable *> Rank = nullptr); /// Create debugging information entry for a vector type. /// \param Size Array size. @@ -576,6 +598,12 @@ namespace llvm { DISubrange *getOrCreateSubrange(Metadata *Count, Metadata *LowerBound, Metadata *UpperBound, Metadata *Stride); + DIGenericSubrange * + getOrCreateGenericSubrange(DIGenericSubrange::BoundType Count, + DIGenericSubrange::BoundType LowerBound, + DIGenericSubrange::BoundType UpperBound, + DIGenericSubrange::BoundType Stride); + /// Create a new descriptor for the specified variable. /// \param Context Variable scope. /// \param Name Name of the variable. @@ -744,14 +772,18 @@ namespace llvm { /// definitions as they would appear on a command line. /// \param IncludePath The path to the module map file. /// \param APINotesFile The path to an API notes file for this module. - /// \param File Source file of the module declaration. Used for - /// Fortran modules. - /// \param LineNo Source line number of the module declaration. + /// \param File Source file of the module. + /// Used for Fortran modules. + /// \param LineNo Source line number of the module. /// Used for Fortran modules. + /// \param IsDecl This is a module declaration; default to false; + /// when set to true, only Scope and Name are required + /// as this entry is just a hint for the debugger to find + /// the corresponding definition in the global scope. DIModule *createModule(DIScope *Scope, StringRef Name, StringRef ConfigurationMacros, StringRef IncludePath, StringRef APINotesFile = {}, DIFile *File = nullptr, - unsigned LineNo = 0); + unsigned LineNo = 0, bool IsDecl = false); /// This creates a descriptor for a lexical block with a new file /// attached. This merely extends the existing diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 17297bb8b309..eb031613a935 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -123,6 +123,7 @@ private: unsigned AllocaAddrSpace; MaybeAlign StackNaturalAlign; unsigned ProgramAddrSpace; + unsigned DefaultGlobalsAddrSpace; MaybeAlign FunctionPtrAlign; FunctionPtrAlignType TheFunctionPtrAlignType; @@ -160,12 +161,7 @@ private: using PointersTy = SmallVector<PointerAlignElem, 8>; PointersTy Pointers; - PointersTy::const_iterator - findPointerLowerBound(uint32_t AddressSpace) const { - return const_cast<DataLayout *>(this)->findPointerLowerBound(AddressSpace); - } - - PointersTy::iterator findPointerLowerBound(uint32_t AddressSpace); + const PointerAlignElem &getPointerAlignElem(uint32_t AddressSpace) const; // The StructType -> StructLayout map. mutable void *LayoutMap = nullptr; @@ -174,19 +170,25 @@ private: /// well-defined bitwise representation. SmallVector<unsigned, 8> NonIntegralAddressSpaces; - void setAlignment(AlignTypeEnum align_type, Align abi_align, Align pref_align, - uint32_t bit_width); - Align getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width, - bool ABIAlign, Type *Ty) const; - void setPointerAlignment(uint32_t AddrSpace, Align ABIAlign, Align PrefAlign, - uint32_t TypeByteWidth, uint32_t IndexWidth); + /// Attempts to set the alignment of the given type. Returns an error + /// description on failure. + Error setAlignment(AlignTypeEnum align_type, Align abi_align, + Align pref_align, uint32_t bit_width); + + /// Attempts to set the alignment of a pointer in the given address space. + /// Returns an error description on failure. + Error setPointerAlignment(uint32_t AddrSpace, Align ABIAlign, Align PrefAlign, + uint32_t TypeByteWidth, uint32_t IndexWidth); + + /// Internal helper to get alignment for integer of given bitwidth. + Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const; /// Internal helper method that returns requested alignment for type. Align getAlignment(Type *Ty, bool abi_or_pref) const; - /// Parses a target data specification string. Assert if the string is - /// malformed. - void parseSpecifier(StringRef LayoutDescription); + /// Attempts to parse a target data specification string and reports an error + /// if the string is malformed. + Error parseSpecifier(StringRef Desc); // Free all internal data structures. void clear(); @@ -213,6 +215,7 @@ public: FunctionPtrAlign = DL.FunctionPtrAlign; TheFunctionPtrAlignType = DL.TheFunctionPtrAlignType; ProgramAddrSpace = DL.ProgramAddrSpace; + DefaultGlobalsAddrSpace = DL.DefaultGlobalsAddrSpace; ManglingMode = DL.ManglingMode; LegalIntWidths = DL.LegalIntWidths; Alignments = DL.Alignments; @@ -229,6 +232,10 @@ public: /// Parse a data layout string (with fallback to default values). void reset(StringRef LayoutDescription); + /// Parse a data layout string and return the layout. Return an error + /// description on failure. + static Expected<DataLayout> parse(StringRef LayoutDescription); + /// Layout endianness... bool isLittleEndian() const { return !BigEndian; } bool isBigEndian() const { return BigEndian; } @@ -285,6 +292,9 @@ public: } unsigned getProgramAddressSpace() const { return ProgramAddrSpace; } + unsigned getDefaultGlobalsAddressSpace() const { + return DefaultGlobalsAddrSpace; + } bool hasMicrosoftFastStdCallMangling() const { return ManglingMode == MM_WinCOFFX86; @@ -378,7 +388,7 @@ public: bool isNonIntegralAddressSpace(unsigned AddrSpace) const { ArrayRef<unsigned> NonIntegralSpaces = getNonIntegralAddressSpaces(); - return find(NonIntegralSpaces, AddrSpace) != NonIntegralSpaces.end(); + return is_contained(NonIntegralSpaces, AddrSpace); } bool isNonIntegralPointerType(PointerType *PT) const { @@ -520,7 +530,9 @@ public: /// Returns the minimum ABI-required alignment for an integer type of /// the specified bitwidth. - Align getABIIntegerTypeAlignment(unsigned BitWidth) const; + Align getABIIntegerTypeAlignment(unsigned BitWidth) const { + return getIntegerAlignment(BitWidth, /* abi_or_pref */ true); + } /// Returns the preferred stack/global alignment for the specified /// type. @@ -678,6 +690,8 @@ inline TypeSize DataLayout::getTypeSizeInBits(Type *Ty) const { case Type::PPC_FP128TyID: case Type::FP128TyID: return TypeSize::Fixed(128); + case Type::X86_AMXTyID: + return TypeSize::Fixed(8192); // In memory objects this is always aligned to a higher boundary, but // only 80 bits contain information. case Type::X86_FP80TyID: @@ -686,9 +700,9 @@ inline TypeSize DataLayout::getTypeSizeInBits(Type *Ty) const { case Type::ScalableVectorTyID: { VectorType *VTy = cast<VectorType>(Ty); auto EltCnt = VTy->getElementCount(); - uint64_t MinBits = EltCnt.Min * - getTypeSizeInBits(VTy->getElementType()).getFixedSize(); - return TypeSize(MinBits, EltCnt.Scalable); + uint64_t MinBits = EltCnt.getKnownMinValue() * + getTypeSizeInBits(VTy->getElementType()).getFixedSize(); + return TypeSize(MinBits, EltCnt.isScalable()); } default: llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type"); diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 7d7cc4de7937..22dd5ee6efac 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -182,6 +182,7 @@ public: case DISubrangeKind: case DIEnumeratorKind: case DIBasicTypeKind: + case DIStringTypeKind: case DIDerivedTypeKind: case DICompositeTypeKind: case DISubroutineTypeKind: @@ -200,6 +201,7 @@ public: case DIObjCPropertyKind: case DIImportedEntityKind: case DIModuleKind: + case DIGenericSubrangeKind: return true; } } @@ -238,9 +240,8 @@ class GenericDINode : public DINode { StorageType Storage, bool ShouldCreate = true); TempGenericDINode cloneImpl() const { - return getTemporary( - getContext(), getTag(), getHeader(), - SmallVector<Metadata *, 4>(dwarf_op_begin(), dwarf_op_end())); + return getTemporary(getContext(), getTag(), getHeader(), + SmallVector<Metadata *, 4>(dwarf_operands())); } public: @@ -350,6 +351,52 @@ public: } }; +class DIGenericSubrange : public DINode { + friend class LLVMContextImpl; + friend class MDNode; + + DIGenericSubrange(LLVMContext &C, StorageType Storage, + ArrayRef<Metadata *> Ops) + : DINode(C, DIGenericSubrangeKind, Storage, + dwarf::DW_TAG_generic_subrange, Ops) {} + + ~DIGenericSubrange() = default; + + static DIGenericSubrange *getImpl(LLVMContext &Context, Metadata *CountNode, + Metadata *LowerBound, Metadata *UpperBound, + Metadata *Stride, StorageType Storage, + bool ShouldCreate = true); + + TempDIGenericSubrange cloneImpl() const { + return getTemporary(getContext(), getRawCountNode(), getRawLowerBound(), + getRawUpperBound(), getRawStride()); + } + +public: + DEFINE_MDNODE_GET(DIGenericSubrange, + (Metadata * CountNode, Metadata *LowerBound, + Metadata *UpperBound, Metadata *Stride), + (CountNode, LowerBound, UpperBound, Stride)) + + TempDIGenericSubrange clone() const { return cloneImpl(); } + + Metadata *getRawCountNode() const { return getOperand(0).get(); } + Metadata *getRawLowerBound() const { return getOperand(1).get(); } + Metadata *getRawUpperBound() const { return getOperand(2).get(); } + Metadata *getRawStride() const { return getOperand(3).get(); } + + using BoundType = PointerUnion<DIVariable *, DIExpression *>; + + BoundType getCount() const; + BoundType getLowerBound() const; + BoundType getUpperBound() const; + BoundType getStride() const; + + static bool classof(const Metadata *MD) { + return MD->getMetadataID() == DIGenericSubrangeKind; + } +}; + /// Enumeration value. /// /// TODO: Add a pointer to the context (DW_TAG_enumeration_type) once that no @@ -451,6 +498,7 @@ public: default: return false; case DIBasicTypeKind: + case DIStringTypeKind: case DIDerivedTypeKind: case DICompositeTypeKind: case DISubroutineTypeKind: @@ -697,6 +745,7 @@ public: default: return false; case DIBasicTypeKind: + case DIStringTypeKind: case DIDerivedTypeKind: case DICompositeTypeKind: case DISubroutineTypeKind: @@ -747,6 +796,12 @@ public: DEFINE_MDNODE_GET(DIBasicType, (unsigned Tag, StringRef Name), (Tag, Name, 0, 0, 0, FlagZero)) DEFINE_MDNODE_GET(DIBasicType, + (unsigned Tag, StringRef Name, uint64_t SizeInBits), + (Tag, Name, SizeInBits, 0, 0, FlagZero)) + DEFINE_MDNODE_GET(DIBasicType, + (unsigned Tag, MDString *Name, uint64_t SizeInBits), + (Tag, Name, SizeInBits, 0, 0, FlagZero)) + DEFINE_MDNODE_GET(DIBasicType, (unsigned Tag, StringRef Name, uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, DIFlags Flags), (Tag, Name, SizeInBits, AlignInBits, Encoding, Flags)) @@ -770,6 +825,81 @@ public: } }; +/// String type, Fortran CHARACTER(n) +class DIStringType : public DIType { + friend class LLVMContextImpl; + friend class MDNode; + + unsigned Encoding; + + DIStringType(LLVMContext &C, StorageType Storage, unsigned Tag, + uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding, + ArrayRef<Metadata *> Ops) + : DIType(C, DIStringTypeKind, Storage, Tag, 0, SizeInBits, AlignInBits, 0, + FlagZero, Ops), + Encoding(Encoding) {} + ~DIStringType() = default; + + static DIStringType *getImpl(LLVMContext &Context, unsigned Tag, + StringRef Name, Metadata *StringLength, + Metadata *StrLenExp, uint64_t SizeInBits, + uint32_t AlignInBits, unsigned Encoding, + StorageType Storage, bool ShouldCreate = true) { + return getImpl(Context, Tag, getCanonicalMDString(Context, Name), + StringLength, StrLenExp, SizeInBits, AlignInBits, Encoding, + Storage, ShouldCreate); + } + static DIStringType *getImpl(LLVMContext &Context, unsigned Tag, + MDString *Name, Metadata *StringLength, + Metadata *StrLenExp, uint64_t SizeInBits, + uint32_t AlignInBits, unsigned Encoding, + StorageType Storage, bool ShouldCreate = true); + + TempDIStringType cloneImpl() const { + return getTemporary(getContext(), getTag(), getRawName(), + getRawStringLength(), getRawStringLengthExp(), + getSizeInBits(), getAlignInBits(), getEncoding()); + } + +public: + DEFINE_MDNODE_GET(DIStringType, + (unsigned Tag, StringRef Name, uint64_t SizeInBits, + uint32_t AlignInBits), + (Tag, Name, nullptr, nullptr, SizeInBits, AlignInBits, 0)) + DEFINE_MDNODE_GET(DIStringType, + (unsigned Tag, MDString *Name, Metadata *StringLength, + Metadata *StringLengthExp, uint64_t SizeInBits, + uint32_t AlignInBits, unsigned Encoding), + (Tag, Name, StringLength, StringLengthExp, SizeInBits, + AlignInBits, Encoding)) + DEFINE_MDNODE_GET(DIStringType, + (unsigned Tag, StringRef Name, Metadata *StringLength, + Metadata *StringLengthExp, uint64_t SizeInBits, + uint32_t AlignInBits, unsigned Encoding), + (Tag, Name, StringLength, StringLengthExp, SizeInBits, + AlignInBits, Encoding)) + + TempDIStringType clone() const { return cloneImpl(); } + + static bool classof(const Metadata *MD) { + return MD->getMetadataID() == DIStringTypeKind; + } + + DIVariable *getStringLength() const { + return cast_or_null<DIVariable>(getRawStringLength()); + } + + DIExpression *getStringLengthExp() const { + return cast_or_null<DIExpression>(getRawStringLengthExp()); + } + + unsigned getEncoding() const { return Encoding; } + + Metadata *getRawStringLength() const { return getOperand(3); } + + Metadata *getRawStringLengthExp() const { return getOperand(4); } +}; + /// Derived types. /// /// This includes qualified types, pointers, references, friends, typedefs, and @@ -942,13 +1072,14 @@ class DICompositeType : public DIType { DINodeArray Elements, unsigned RuntimeLang, DIType *VTableHolder, DITemplateParameterArray TemplateParams, StringRef Identifier, DIDerivedType *Discriminator, Metadata *DataLocation, + Metadata *Associated, Metadata *Allocated, Metadata *Rank, StorageType Storage, bool ShouldCreate = true) { - return getImpl(Context, Tag, getCanonicalMDString(Context, Name), File, - Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits, - Flags, Elements.get(), RuntimeLang, VTableHolder, - TemplateParams.get(), - getCanonicalMDString(Context, Identifier), Discriminator, - DataLocation, Storage, ShouldCreate); + return getImpl( + Context, Tag, getCanonicalMDString(Context, Name), File, Line, Scope, + BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements.get(), + RuntimeLang, VTableHolder, TemplateParams.get(), + getCanonicalMDString(Context, Identifier), Discriminator, DataLocation, + Associated, Allocated, Rank, Storage, ShouldCreate); } static DICompositeType * getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File, @@ -957,6 +1088,7 @@ class DICompositeType : public DIType { DIFlags Flags, Metadata *Elements, unsigned RuntimeLang, Metadata *VTableHolder, Metadata *TemplateParams, MDString *Identifier, Metadata *Discriminator, Metadata *DataLocation, + Metadata *Associated, Metadata *Allocated, Metadata *Rank, StorageType Storage, bool ShouldCreate = true); TempDICompositeType cloneImpl() const { @@ -965,7 +1097,8 @@ class DICompositeType : public DIType { getAlignInBits(), getOffsetInBits(), getFlags(), getElements(), getRuntimeLang(), getVTableHolder(), getTemplateParams(), getIdentifier(), - getDiscriminator(), getRawDataLocation()); + getDiscriminator(), getRawDataLocation(), + getRawAssociated(), getRawAllocated(), getRawRank()); } public: @@ -977,10 +1110,11 @@ public: DINodeArray Elements, unsigned RuntimeLang, DIType *VTableHolder, DITemplateParameterArray TemplateParams = nullptr, StringRef Identifier = "", DIDerivedType *Discriminator = nullptr, - Metadata *DataLocation = nullptr), + Metadata *DataLocation = nullptr, Metadata *Associated = nullptr, + Metadata *Allocated = nullptr, Metadata *Rank = nullptr), (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams, - Identifier, Discriminator, DataLocation)) + Identifier, Discriminator, DataLocation, Associated, Allocated, Rank)) DEFINE_MDNODE_GET( DICompositeType, (unsigned Tag, MDString *Name, Metadata *File, unsigned Line, @@ -988,10 +1122,12 @@ public: uint32_t AlignInBits, uint64_t OffsetInBits, DIFlags Flags, Metadata *Elements, unsigned RuntimeLang, Metadata *VTableHolder, Metadata *TemplateParams = nullptr, MDString *Identifier = nullptr, - Metadata *Discriminator = nullptr, Metadata *DataLocation = nullptr), + Metadata *Discriminator = nullptr, Metadata *DataLocation = nullptr, + Metadata *Associated = nullptr, Metadata *Allocated = nullptr, + Metadata *Rank = nullptr), (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams, - Identifier, Discriminator, DataLocation)) + Identifier, Discriminator, DataLocation, Associated, Allocated, Rank)) TempDICompositeType clone() const { return cloneImpl(); } @@ -1009,7 +1145,8 @@ public: uint64_t OffsetInBits, DIFlags Flags, Metadata *Elements, unsigned RuntimeLang, Metadata *VTableHolder, Metadata *TemplateParams, Metadata *Discriminator, - Metadata *DataLocation); + Metadata *DataLocation, Metadata *Associated, Metadata *Allocated, + Metadata *Rank); static DICompositeType *getODRTypeIfExists(LLVMContext &Context, MDString &Identifier); @@ -1029,7 +1166,8 @@ public: uint64_t OffsetInBits, DIFlags Flags, Metadata *Elements, unsigned RuntimeLang, Metadata *VTableHolder, Metadata *TemplateParams, Metadata *Discriminator, - Metadata *DataLocation); + Metadata *DataLocation, Metadata *Associated, + Metadata *Allocated, Metadata *Rank); DIType *getBaseType() const { return cast_or_null<DIType>(getRawBaseType()); } DINodeArray getElements() const { @@ -1058,6 +1196,29 @@ public: DIExpression *getDataLocationExp() const { return dyn_cast_or_null<DIExpression>(getRawDataLocation()); } + Metadata *getRawAssociated() const { return getOperand(10); } + DIVariable *getAssociated() const { + return dyn_cast_or_null<DIVariable>(getRawAssociated()); + } + DIExpression *getAssociatedExp() const { + return dyn_cast_or_null<DIExpression>(getRawAssociated()); + } + Metadata *getRawAllocated() const { return getOperand(11); } + DIVariable *getAllocated() const { + return dyn_cast_or_null<DIVariable>(getRawAllocated()); + } + DIExpression *getAllocatedExp() const { + return dyn_cast_or_null<DIExpression>(getRawAllocated()); + } + Metadata *getRawRank() const { return getOperand(12); } + ConstantInt *getRankConst() const { + if (auto *MD = dyn_cast_or_null<ConstantAsMetadata>(getRawRank())) + return dyn_cast_or_null<ConstantInt>(MD->getValue()); + return nullptr; + } + DIExpression *getRankExp() const { + return dyn_cast_or_null<DIExpression>(getRawRank()); + } /// Replace operands. /// @@ -1536,6 +1697,18 @@ public: inline unsigned getDiscriminator() const; + // For the regular discriminator, it stands for all empty components if all + // the lowest 3 bits are non-zero and all higher 29 bits are unused(zero by + // default). Here we fully leverage the higher 29 bits for pseudo probe use. + // This is the format: + // [2:0] - 0x7 + // [31:3] - pseudo probe fields guaranteed to be non-zero as a whole + // So if the lower 3 bits is non-zero and the others has at least one + // non-zero bit, it guarantees to be a pseudo probe discriminator + inline static bool isPseudoProbeDiscriminator(unsigned Discriminator) { + return ((Discriminator & 0x7) == 0x7) && (Discriminator & 0xFFFFFFF8); + } + /// Returns a new DILocation with updated \p Discriminator. inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const; @@ -1879,6 +2052,10 @@ public: return getNumOperands() > 10 ? getOperandAs<Metadata>(10) : nullptr; } + void replaceRawLinkageName(MDString *LinkageName) { + replaceOperandWith(3, LinkageName); + } + /// Check if this subprogram describes the given function. /// /// FIXME: Should this be looking through bitcasts? @@ -2123,49 +2300,52 @@ class DIModule : public DIScope { friend class LLVMContextImpl; friend class MDNode; unsigned LineNo; + bool IsDecl; DIModule(LLVMContext &Context, StorageType Storage, unsigned LineNo, - ArrayRef<Metadata *> Ops) + bool IsDecl, ArrayRef<Metadata *> Ops) : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops), - LineNo(LineNo) {} + LineNo(LineNo), IsDecl(IsDecl) {} ~DIModule() = default; static DIModule *getImpl(LLVMContext &Context, DIFile *File, DIScope *Scope, StringRef Name, StringRef ConfigurationMacros, StringRef IncludePath, StringRef APINotesFile, - unsigned LineNo, StorageType Storage, + unsigned LineNo, bool IsDecl, StorageType Storage, bool ShouldCreate = true) { return getImpl(Context, File, Scope, getCanonicalMDString(Context, Name), getCanonicalMDString(Context, ConfigurationMacros), getCanonicalMDString(Context, IncludePath), - getCanonicalMDString(Context, APINotesFile), LineNo, Storage, - ShouldCreate); + getCanonicalMDString(Context, APINotesFile), LineNo, IsDecl, + Storage, ShouldCreate); } static DIModule *getImpl(LLVMContext &Context, Metadata *File, Metadata *Scope, MDString *Name, MDString *ConfigurationMacros, MDString *IncludePath, - MDString *APINotesFile, unsigned LineNo, + MDString *APINotesFile, unsigned LineNo, bool IsDecl, StorageType Storage, bool ShouldCreate = true); TempDIModule cloneImpl() const { return getTemporary(getContext(), getFile(), getScope(), getName(), getConfigurationMacros(), getIncludePath(), - getAPINotesFile(), getLineNo()); + getAPINotesFile(), getLineNo(), getIsDecl()); } public: DEFINE_MDNODE_GET(DIModule, (DIFile * File, DIScope *Scope, StringRef Name, StringRef ConfigurationMacros, StringRef IncludePath, - StringRef APINotesFile, unsigned LineNo), + StringRef APINotesFile, unsigned LineNo, + bool IsDecl = false), (File, Scope, Name, ConfigurationMacros, IncludePath, - APINotesFile, LineNo)) + APINotesFile, LineNo, IsDecl)) DEFINE_MDNODE_GET(DIModule, (Metadata * File, Metadata *Scope, MDString *Name, MDString *ConfigurationMacros, MDString *IncludePath, - MDString *APINotesFile, unsigned LineNo), + MDString *APINotesFile, unsigned LineNo, + bool IsDecl = false), (File, Scope, Name, ConfigurationMacros, IncludePath, - APINotesFile, LineNo)) + APINotesFile, LineNo, IsDecl)) TempDIModule clone() const { return cloneImpl(); } @@ -2175,6 +2355,7 @@ public: StringRef getIncludePath() const { return getStringOperand(4); } StringRef getAPINotesFile() const { return getStringOperand(5); } unsigned getLineNo() const { return LineNo; } + bool getIsDecl() const { return IsDecl; } Metadata *getRawScope() const { return getOperand(1); } MDString *getRawName() const { return getOperandAs<MDString>(2); } @@ -2409,6 +2590,9 @@ public: /// Determine whether this represents a standalone constant value. bool isConstant() const; + /// Determine whether this represents a standalone signed constant value. + bool isSignedConstant() const; + using element_iterator = ArrayRef<uint64_t>::iterator; element_iterator elements_begin() const { return getElements().begin(); } diff --git a/llvm/include/llvm/IR/DebugLoc.h b/llvm/include/llvm/IR/DebugLoc.h index 4914d733fe0d..4824f2e9f2fd 100644 --- a/llvm/include/llvm/IR/DebugLoc.h +++ b/llvm/include/llvm/IR/DebugLoc.h @@ -68,27 +68,13 @@ namespace llvm { /// Check whether this has a trivial destructor. bool hasTrivialDestructor() const { return Loc.hasTrivialDestructor(); } - /// Create a new DebugLoc. - /// - /// Create a new DebugLoc at the specified line/col and scope/inline. This - /// forwards to \a DILocation::get(). - /// - /// If \c !Scope, returns a default-constructed \a DebugLoc. - /// - /// FIXME: Remove this. Users should use DILocation::get(). - static DebugLoc get(unsigned Line, unsigned Col, const MDNode *Scope, - const MDNode *InlinedAt = nullptr, - bool ImplicitCode = false); - enum { ReplaceLastInlinedAt = true }; /// Rebuild the entire inlined-at chain for this instruction so that the top of /// the chain now is inlined-at the new call site. /// \param InlinedAt The new outermost inlined-at in the chain. - /// \param ReplaceLast Replace the last location in the inlined-at chain. static DebugLoc appendInlinedAt(const DebugLoc &DL, DILocation *InlinedAt, LLVMContext &Ctx, - DenseMap<const MDNode *, MDNode *> &Cache, - bool ReplaceLast = false); + DenseMap<const MDNode *, MDNode *> &Cache); unsigned getLine() const; unsigned getCol() const; diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index 3618447168be..c3d97f4520e1 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -87,12 +87,6 @@ public: /// Get a bit mask for this type. APInt getMask() const; - /// This method determines if the width of this IntegerType is a power-of-2 - /// in terms of 8 bit bytes. - /// @returns true if this is a power-of-2 byte width. - /// Is this a power-of-2 byte-width IntegerType ? - bool isPowerOf2ByteWidth() const; - /// Methods for support type inquiry through isa, cast, and dyn_cast. static bool classof(const Type *T) { return T->getTypeID() == IntegerTyID; @@ -273,6 +267,10 @@ public: return llvm::StructType::get(Ctx, StructFields); } + /// Return the type with the specified name, or null if there is none by that + /// name. + static StructType *getTypeByName(LLVMContext &C, StringRef Name); + bool isPacked() const { return (getSubclassData() & SCDB_Packed) != 0; } /// Return true if this type is uniqued by structural equivalence, false if it @@ -286,6 +284,9 @@ public: /// isSized - Return true if this is a sized type. bool isSized(SmallPtrSetImpl<Type *> *Visited = nullptr) const; + /// Returns true if this struct contains a scalable vector. + bool containsScalableVectorType() const; + /// Return true if this is a named struct that has a non-empty name. bool hasName() const { return SymbolTableEntry != nullptr; } @@ -423,41 +424,21 @@ public: /// Get the number of elements in this vector. It does not make sense to call /// this function on a scalable vector, and this will be moved into /// FixedVectorType in a future commit - unsigned getNumElements() const { - ElementCount EC = getElementCount(); -#ifdef STRICT_FIXED_SIZE_VECTORS - assert(!EC.Scalable && - "Request for fixed number of elements from scalable vector"); - return EC.Min; -#else - if (EC.Scalable) - WithColor::warning() - << "The code that requested the fixed number of elements has made " - "the assumption that this vector is not scalable. This assumption " - "was not correct, and this may lead to broken code\n"; - return EC.Min; -#endif - } + LLVM_ATTRIBUTE_DEPRECATED( + inline unsigned getNumElements() const, + "Calling this function via a base VectorType is deprecated. Either call " + "getElementCount() and handle the case where Scalable is true or cast to " + "FixedVectorType."); Type *getElementType() const { return ContainedType; } /// This static method is the primary way to construct an VectorType. static VectorType *get(Type *ElementType, ElementCount EC); - /// Base class getter that specifically constructs a FixedVectorType. This - /// function is deprecated, and will be removed after LLVM 11 ships. Since - /// this always returns a FixedVectorType via a base VectorType pointer, - /// FixedVectorType::get(Type *, unsigned) is strictly better since no cast is - /// required to call getNumElements() on the result. - LLVM_ATTRIBUTE_DEPRECATED( - inline static VectorType *get(Type *ElementType, unsigned NumElements), - "The base class version of get with the scalable argument defaulted to " - "false is deprecated. Either call VectorType::get(Type *, unsigned, " - "bool) and pass false, or call FixedVectorType::get(Type *, unsigned)."); - static VectorType *get(Type *ElementType, unsigned NumElements, bool Scalable) { - return VectorType::get(ElementType, {NumElements, Scalable}); + return VectorType::get(ElementType, + ElementCount::get(NumElements, Scalable)); } static VectorType *get(Type *ElementType, const VectorType *Other) { @@ -522,16 +503,18 @@ public: /// input type and the same element type. static VectorType *getHalfElementsVectorType(VectorType *VTy) { auto EltCnt = VTy->getElementCount(); - assert ((EltCnt.Min & 1) == 0 && - "Cannot halve vector with odd number of elements."); - return VectorType::get(VTy->getElementType(), EltCnt/2); + assert(EltCnt.isKnownEven() && + "Cannot halve vector with odd number of elements."); + return VectorType::get(VTy->getElementType(), + EltCnt.divideCoefficientBy(2)); } /// This static method returns a VectorType with twice as many elements as the /// input type and the same element type. static VectorType *getDoubleElementsVectorType(VectorType *VTy) { auto EltCnt = VTy->getElementCount(); - assert((EltCnt.Min * 2ull) <= UINT_MAX && "Too many elements in vector"); + assert((EltCnt.getKnownMinValue() * 2ull) <= UINT_MAX && + "Too many elements in vector"); return VectorType::get(VTy->getElementType(), EltCnt * 2); } @@ -549,8 +532,19 @@ public: } }; -inline VectorType *VectorType::get(Type *ElementType, unsigned NumElements) { - return VectorType::get(ElementType, NumElements, false); +unsigned VectorType::getNumElements() const { + ElementCount EC = getElementCount(); +#ifdef STRICT_FIXED_SIZE_VECTORS + assert(!EC.isScalable() && + "Request for fixed number of elements from scalable vector"); +#else + if (EC.isScalable()) + WithColor::warning() + << "The code that requested the fixed number of elements has made the " + "assumption that this vector is not scalable. This assumption was " + "not correct, and this may lead to broken code\n"; +#endif + return EC.getKnownMinValue(); } /// Class to represent fixed width SIMD vectors @@ -596,6 +590,8 @@ public: static bool classof(const Type *T) { return T->getTypeID() == FixedVectorTyID; } + + unsigned getNumElements() const { return ElementQuantity; } }; /// Class to represent scalable SIMD vectors @@ -655,7 +651,7 @@ public: }; inline ElementCount VectorType::getElementCount() const { - return ElementCount(ElementQuantity, isa<ScalableVectorType>(this)); + return ElementCount::get(ElementQuantity, isa<ScalableVectorType>(this)); } /// Class to represent pointers. diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index b7e0ecde8629..c457072d50f1 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -21,6 +21,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Support/CBindingWrapping.h" +#include "llvm/Support/TypeSize.h" #include "llvm/Support/YAMLTraits.h" #include <algorithm> #include <cstdint> @@ -34,6 +35,7 @@ namespace llvm { class DiagnosticPrinter; class Function; class Instruction; +class InstructionCost; class LLVMContext; class Module; class SMDiagnostic; @@ -75,7 +77,6 @@ enum DiagnosticKind { DK_LastMachineRemark = DK_MachineOptimizationRemarkAnalysis, DK_MIRParser, DK_PGOProfile, - DK_MisExpect, DK_Unsupported, DK_FirstPluginKind // Must be last value to work with // getNextAvailablePluginDiagnosticKind @@ -434,8 +435,10 @@ public: Argument(StringRef Key, unsigned N); Argument(StringRef Key, unsigned long N); Argument(StringRef Key, unsigned long long N); + Argument(StringRef Key, ElementCount EC); Argument(StringRef Key, bool B) : Key(Key), Val(B ? "true" : "false") {} Argument(StringRef Key, DebugLoc dl); + Argument(StringRef Key, InstructionCost C); }; /// \p PassName is the name of the pass emitting this diagnostic. \p @@ -1012,25 +1015,6 @@ public: void print(DiagnosticPrinter &DP) const override; }; -/// Diagnostic information for MisExpect analysis. -class DiagnosticInfoMisExpect : public DiagnosticInfoWithLocationBase { -public: - DiagnosticInfoMisExpect(const Instruction *Inst, Twine &Msg); - - /// \see DiagnosticInfo::print. - void print(DiagnosticPrinter &DP) const override; - - static bool classof(const DiagnosticInfo *DI) { - return DI->getKind() == DK_MisExpect; - } - - const Twine &getMsg() const { return Msg; } - -private: - /// Message to report. - const Twine &Msg; -}; - } // end namespace llvm #endif // LLVM_IR_DIAGNOSTICINFO_H diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h index 71595cb15df4..08dbccaf2c01 100644 --- a/llvm/include/llvm/IR/Dominators.h +++ b/llvm/include/llvm/IR/Dominators.h @@ -44,6 +44,9 @@ using BBPostDomTree = PostDomTreeBase<BasicBlock>; using BBUpdates = ArrayRef<llvm::cfg::Update<BasicBlock *>>; +using BBDomTreeGraphDiff = GraphDiff<BasicBlock *, false>; +using BBPostDomTreeGraphDiff = GraphDiff<BasicBlock *, true>; + extern template void Calculate<BBDomTree>(BBDomTree &DT); extern template void CalculateWithUpdates<BBDomTree>(BBDomTree &DT, BBUpdates U); @@ -62,8 +65,12 @@ extern template void DeleteEdge<BBPostDomTree>(BBPostDomTree &DT, BasicBlock *From, BasicBlock *To); -extern template void ApplyUpdates<BBDomTree>(BBDomTree &DT, BBUpdates); -extern template void ApplyUpdates<BBPostDomTree>(BBPostDomTree &DT, BBUpdates); +extern template void ApplyUpdates<BBDomTree>(BBDomTree &DT, + BBDomTreeGraphDiff &, + BBDomTreeGraphDiff *); +extern template void ApplyUpdates<BBPostDomTree>(BBPostDomTree &DT, + BBPostDomTreeGraphDiff &, + BBPostDomTreeGraphDiff *); extern template bool Verify<BBDomTree>(const BBDomTree &DT, BBDomTree::VerificationLevel VL); @@ -158,12 +165,21 @@ class DominatorTree : public DominatorTreeBase<BasicBlock, false> { // Ensure base-class overloads are visible. using Base::dominates; - /// Return true if Def dominates a use in User. + /// Return true if value Def dominates use U, in the sense that Def is + /// available at U, and could be substituted as the used value without + /// violating the SSA dominance requirement. /// - /// This performs the special checks necessary if Def and User are in the same - /// basic block. Note that Def doesn't dominate a use in Def itself! - bool dominates(const Instruction *Def, const Use &U) const; - bool dominates(const Instruction *Def, const Instruction *User) const; + /// In particular, it is worth noting that: + /// * Non-instruction Defs dominate everything. + /// * Def does not dominate a use in Def itself (outside of degenerate cases + /// like unreachable code or trivial phi cycles). + /// * Invoke/callbr Defs only dominate uses in their default destination. + bool dominates(const Value *Def, const Use &U) const; + /// Return true if value Def dominates all possible uses inside instruction + /// User. Same comments as for the Use-based API apply. + bool dominates(const Value *Def, const Instruction *User) const; + // Does not accept Value to avoid ambiguity with dominance checks between + // two basic blocks. bool dominates(const Instruction *Def, const BasicBlock *BB) const; /// Return true if an edge dominates a use. diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 0e1ffef58672..31979cd2f9db 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -39,5 +39,6 @@ LLVM_FIXED_MD_KIND(MD_irr_loop, "irr_loop", 24) LLVM_FIXED_MD_KIND(MD_access_group, "llvm.access.group", 25) LLVM_FIXED_MD_KIND(MD_callback, "callback", 26) LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27) -LLVM_FIXED_MD_KIND(MD_misexpect, "misexpect", 28) -LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 29) +LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 28) +LLVM_FIXED_MD_KIND(MD_noundef, "noundef", 29) +LLVM_FIXED_MD_KIND(MD_annotation, "annotation", 30) diff --git a/llvm/include/llvm/IR/FixedPointBuilder.h b/llvm/include/llvm/IR/FixedPointBuilder.h new file mode 100644 index 000000000000..a99c761ad3e9 --- /dev/null +++ b/llvm/include/llvm/IR/FixedPointBuilder.h @@ -0,0 +1,465 @@ +//===- llvm/FixedPointBuilder.h - Builder for fixed-point ops ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the FixedPointBuilder class, which is used as a convenient +// way to lower fixed-point arithmetic operations to LLVM IR. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_FIXEDPOINTBUILDER_H +#define LLVM_IR_FIXEDPOINTBUILDER_H + +#include "llvm/ADT/APFixedPoint.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" + +namespace llvm { + +template <class IRBuilderTy> class FixedPointBuilder { + IRBuilderTy &B; + + Value *Convert(Value *Src, const FixedPointSemantics &SrcSema, + const FixedPointSemantics &DstSema, bool DstIsInteger) { + unsigned SrcWidth = SrcSema.getWidth(); + unsigned DstWidth = DstSema.getWidth(); + unsigned SrcScale = SrcSema.getScale(); + unsigned DstScale = DstSema.getScale(); + bool SrcIsSigned = SrcSema.isSigned(); + bool DstIsSigned = DstSema.isSigned(); + + Type *DstIntTy = B.getIntNTy(DstWidth); + + Value *Result = Src; + unsigned ResultWidth = SrcWidth; + + // Downscale. + if (DstScale < SrcScale) { + // When converting to integers, we round towards zero. For negative + // numbers, right shifting rounds towards negative infinity. In this case, + // we can just round up before shifting. + if (DstIsInteger && SrcIsSigned) { + Value *Zero = Constant::getNullValue(Result->getType()); + Value *IsNegative = B.CreateICmpSLT(Result, Zero); + Value *LowBits = ConstantInt::get( + B.getContext(), APInt::getLowBitsSet(ResultWidth, SrcScale)); + Value *Rounded = B.CreateAdd(Result, LowBits); + Result = B.CreateSelect(IsNegative, Rounded, Result); + } + + Result = SrcIsSigned + ? B.CreateAShr(Result, SrcScale - DstScale, "downscale") + : B.CreateLShr(Result, SrcScale - DstScale, "downscale"); + } + + if (!DstSema.isSaturated()) { + // Resize. + Result = B.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); + + // Upscale. + if (DstScale > SrcScale) + Result = B.CreateShl(Result, DstScale - SrcScale, "upscale"); + } else { + // Adjust the number of fractional bits. + if (DstScale > SrcScale) { + // Compare to DstWidth to prevent resizing twice. + ResultWidth = std::max(SrcWidth + DstScale - SrcScale, DstWidth); + Type *UpscaledTy = B.getIntNTy(ResultWidth); + Result = B.CreateIntCast(Result, UpscaledTy, SrcIsSigned, "resize"); + Result = B.CreateShl(Result, DstScale - SrcScale, "upscale"); + } + + // Handle saturation. + bool LessIntBits = DstSema.getIntegralBits() < SrcSema.getIntegralBits(); + if (LessIntBits) { + Value *Max = ConstantInt::get( + B.getContext(), + APFixedPoint::getMax(DstSema).getValue().extOrTrunc(ResultWidth)); + Value *TooHigh = SrcIsSigned ? B.CreateICmpSGT(Result, Max) + : B.CreateICmpUGT(Result, Max); + Result = B.CreateSelect(TooHigh, Max, Result, "satmax"); + } + // Cannot overflow min to dest type if src is unsigned since all fixed + // point types can cover the unsigned min of 0. + if (SrcIsSigned && (LessIntBits || !DstIsSigned)) { + Value *Min = ConstantInt::get( + B.getContext(), + APFixedPoint::getMin(DstSema).getValue().extOrTrunc(ResultWidth)); + Value *TooLow = B.CreateICmpSLT(Result, Min); + Result = B.CreateSelect(TooLow, Min, Result, "satmin"); + } + + // Resize the integer part to get the final destination size. + if (ResultWidth != DstWidth) + Result = B.CreateIntCast(Result, DstIntTy, SrcIsSigned, "resize"); + } + return Result; + } + + /// Get the common semantic for two semantics, with the added imposition that + /// saturated padded types retain the padding bit. + FixedPointSemantics + getCommonBinopSemantic(const FixedPointSemantics &LHSSema, + const FixedPointSemantics &RHSSema) { + auto C = LHSSema.getCommonSemantics(RHSSema); + bool BothPadded = + LHSSema.hasUnsignedPadding() && RHSSema.hasUnsignedPadding(); + return FixedPointSemantics( + C.getWidth() + (unsigned)(BothPadded && C.isSaturated()), C.getScale(), + C.isSigned(), C.isSaturated(), BothPadded); + } + + /// Given a floating point type and a fixed-point semantic, return a floating + /// point type which can accommodate the fixed-point semantic. This is either + /// \p Ty, or a floating point type with a larger exponent than Ty. + Type *getAccommodatingFloatType(Type *Ty, const FixedPointSemantics &Sema) { + const fltSemantics *FloatSema = &Ty->getFltSemantics(); + while (!Sema.fitsInFloatSemantics(*FloatSema)) + FloatSema = APFixedPoint::promoteFloatSemantics(FloatSema); + return Type::getFloatingPointTy(Ty->getContext(), *FloatSema); + } + +public: + FixedPointBuilder(IRBuilderTy &Builder) : B(Builder) {} + + /// Convert an integer value representing a fixed-point number from one + /// fixed-point semantic to another fixed-point semantic. + /// \p Src - The source value + /// \p SrcSema - The fixed-point semantic of the source value + /// \p DstSema - The resulting fixed-point semantic + Value *CreateFixedToFixed(Value *Src, const FixedPointSemantics &SrcSema, + const FixedPointSemantics &DstSema) { + return Convert(Src, SrcSema, DstSema, false); + } + + /// Convert an integer value representing a fixed-point number to an integer + /// with the given bit width and signedness. + /// \p Src - The source value + /// \p SrcSema - The fixed-point semantic of the source value + /// \p DstWidth - The bit width of the result value + /// \p DstIsSigned - The signedness of the result value + Value *CreateFixedToInteger(Value *Src, const FixedPointSemantics &SrcSema, + unsigned DstWidth, bool DstIsSigned) { + return Convert( + Src, SrcSema, + FixedPointSemantics::GetIntegerSemantics(DstWidth, DstIsSigned), true); + } + + /// Convert an integer value with the given signedness to an integer value + /// representing the given fixed-point semantic. + /// \p Src - The source value + /// \p SrcIsSigned - The signedness of the source value + /// \p DstSema - The resulting fixed-point semantic + Value *CreateIntegerToFixed(Value *Src, unsigned SrcIsSigned, + const FixedPointSemantics &DstSema) { + return Convert(Src, + FixedPointSemantics::GetIntegerSemantics( + Src->getType()->getScalarSizeInBits(), SrcIsSigned), + DstSema, false); + } + + Value *CreateFixedToFloating(Value *Src, const FixedPointSemantics &SrcSema, + Type *DstTy) { + Value *Result; + Type *OpTy = getAccommodatingFloatType(DstTy, SrcSema); + // Convert the raw fixed-point value directly to floating point. If the + // value is too large to fit, it will be rounded, not truncated. + Result = SrcSema.isSigned() ? B.CreateSIToFP(Src, OpTy) + : B.CreateUIToFP(Src, OpTy); + // Rescale the integral-in-floating point by the scaling factor. This is + // lossless, except for overflow to infinity which is unlikely. + Result = B.CreateFMul(Result, + ConstantFP::get(OpTy, std::pow(2, -(int)SrcSema.getScale()))); + if (OpTy != DstTy) + Result = B.CreateFPTrunc(Result, DstTy); + return Result; + } + + Value *CreateFloatingToFixed(Value *Src, const FixedPointSemantics &DstSema) { + bool UseSigned = DstSema.isSigned() || DstSema.hasUnsignedPadding(); + Value *Result = Src; + Type *OpTy = getAccommodatingFloatType(Src->getType(), DstSema); + if (OpTy != Src->getType()) + Result = B.CreateFPExt(Result, OpTy); + // Rescale the floating point value so that its significant bits (for the + // purposes of the conversion) are in the integral range. + Result = B.CreateFMul(Result, + ConstantFP::get(OpTy, std::pow(2, DstSema.getScale()))); + + Type *ResultTy = B.getIntNTy(DstSema.getWidth()); + if (DstSema.isSaturated()) { + Intrinsic::ID IID = + UseSigned ? Intrinsic::fptosi_sat : Intrinsic::fptoui_sat; + Result = B.CreateIntrinsic(IID, {ResultTy, OpTy}, {Result}); + } else { + Result = UseSigned ? B.CreateFPToSI(Result, ResultTy) + : B.CreateFPToUI(Result, ResultTy); + } + + // When saturating unsigned-with-padding using signed operations, we may + // get negative values. Emit an extra clamp to zero. + if (DstSema.isSaturated() && DstSema.hasUnsignedPadding()) { + Constant *Zero = Constant::getNullValue(Result->getType()); + Result = + B.CreateSelect(B.CreateICmpSLT(Result, Zero), Zero, Result, "satmin"); + } + + return Result; + } + + /// Add two fixed-point values and return the result in their common semantic. + /// \p LHS - The left hand side + /// \p LHSSema - The semantic of the left hand side + /// \p RHS - The right hand side + /// \p RHSSema - The semantic of the right hand side + Value *CreateAdd(Value *LHS, const FixedPointSemantics &LHSSema, + Value *RHS, const FixedPointSemantics &RHSSema) { + auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema); + bool UseSigned = CommonSema.isSigned() || CommonSema.hasUnsignedPadding(); + + Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema); + Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema); + + Value *Result; + if (CommonSema.isSaturated()) { + Intrinsic::ID IID = UseSigned ? Intrinsic::sadd_sat : Intrinsic::uadd_sat; + Result = B.CreateBinaryIntrinsic(IID, WideLHS, WideRHS); + } else { + Result = B.CreateAdd(WideLHS, WideRHS); + } + + return CreateFixedToFixed(Result, CommonSema, + LHSSema.getCommonSemantics(RHSSema)); + } + + /// Subtract two fixed-point values and return the result in their common + /// semantic. + /// \p LHS - The left hand side + /// \p LHSSema - The semantic of the left hand side + /// \p RHS - The right hand side + /// \p RHSSema - The semantic of the right hand side + Value *CreateSub(Value *LHS, const FixedPointSemantics &LHSSema, + Value *RHS, const FixedPointSemantics &RHSSema) { + auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema); + bool UseSigned = CommonSema.isSigned() || CommonSema.hasUnsignedPadding(); + + Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema); + Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema); + + Value *Result; + if (CommonSema.isSaturated()) { + Intrinsic::ID IID = UseSigned ? Intrinsic::ssub_sat : Intrinsic::usub_sat; + Result = B.CreateBinaryIntrinsic(IID, WideLHS, WideRHS); + } else { + Result = B.CreateSub(WideLHS, WideRHS); + } + + // Subtraction can end up below 0 for padded unsigned operations, so emit + // an extra clamp in that case. + if (CommonSema.isSaturated() && CommonSema.hasUnsignedPadding()) { + Constant *Zero = Constant::getNullValue(Result->getType()); + Result = + B.CreateSelect(B.CreateICmpSLT(Result, Zero), Zero, Result, "satmin"); + } + + return CreateFixedToFixed(Result, CommonSema, + LHSSema.getCommonSemantics(RHSSema)); + } + + /// Multiply two fixed-point values and return the result in their common + /// semantic. + /// \p LHS - The left hand side + /// \p LHSSema - The semantic of the left hand side + /// \p RHS - The right hand side + /// \p RHSSema - The semantic of the right hand side + Value *CreateMul(Value *LHS, const FixedPointSemantics &LHSSema, + Value *RHS, const FixedPointSemantics &RHSSema) { + auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema); + bool UseSigned = CommonSema.isSigned() || CommonSema.hasUnsignedPadding(); + + Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema); + Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema); + + Intrinsic::ID IID; + if (CommonSema.isSaturated()) { + IID = UseSigned ? Intrinsic::smul_fix_sat : Intrinsic::umul_fix_sat; + } else { + IID = UseSigned ? Intrinsic::smul_fix : Intrinsic::umul_fix; + } + Value *Result = B.CreateIntrinsic( + IID, {WideLHS->getType()}, + {WideLHS, WideRHS, B.getInt32(CommonSema.getScale())}); + + return CreateFixedToFixed(Result, CommonSema, + LHSSema.getCommonSemantics(RHSSema)); + } + + /// Divide two fixed-point values and return the result in their common + /// semantic. + /// \p LHS - The left hand side + /// \p LHSSema - The semantic of the left hand side + /// \p RHS - The right hand side + /// \p RHSSema - The semantic of the right hand side + Value *CreateDiv(Value *LHS, const FixedPointSemantics &LHSSema, + Value *RHS, const FixedPointSemantics &RHSSema) { + auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema); + bool UseSigned = CommonSema.isSigned() || CommonSema.hasUnsignedPadding(); + + Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema); + Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema); + + Intrinsic::ID IID; + if (CommonSema.isSaturated()) { + IID = UseSigned ? Intrinsic::sdiv_fix_sat : Intrinsic::udiv_fix_sat; + } else { + IID = UseSigned ? Intrinsic::sdiv_fix : Intrinsic::udiv_fix; + } + Value *Result = B.CreateIntrinsic( + IID, {WideLHS->getType()}, + {WideLHS, WideRHS, B.getInt32(CommonSema.getScale())}); + + return CreateFixedToFixed(Result, CommonSema, + LHSSema.getCommonSemantics(RHSSema)); + } + + /// Left shift a fixed-point value by an unsigned integer value. The integer + /// value can be any bit width. + /// \p LHS - The left hand side + /// \p LHSSema - The semantic of the left hand side + /// \p RHS - The right hand side + Value *CreateShl(Value *LHS, const FixedPointSemantics &LHSSema, Value *RHS) { + bool UseSigned = LHSSema.isSigned() || LHSSema.hasUnsignedPadding(); + + RHS = B.CreateIntCast(RHS, LHS->getType(), /*IsSigned=*/false); + + Value *Result; + if (LHSSema.isSaturated()) { + Intrinsic::ID IID = UseSigned ? Intrinsic::sshl_sat : Intrinsic::ushl_sat; + Result = B.CreateBinaryIntrinsic(IID, LHS, RHS); + } else { + Result = B.CreateShl(LHS, RHS); + } + + return Result; + } + + /// Right shift a fixed-point value by an unsigned integer value. The integer + /// value can be any bit width. + /// \p LHS - The left hand side + /// \p LHSSema - The semantic of the left hand side + /// \p RHS - The right hand side + Value *CreateShr(Value *LHS, const FixedPointSemantics &LHSSema, Value *RHS) { + RHS = B.CreateIntCast(RHS, LHS->getType(), false); + + return LHSSema.isSigned() ? B.CreateAShr(LHS, RHS) : B.CreateLShr(LHS, RHS); + } + + /// Compare two fixed-point values for equality. + /// \p LHS - The left hand side + /// \p LHSSema - The semantic of the left hand side + /// \p RHS - The right hand side + /// \p RHSSema - The semantic of the right hand side + Value *CreateEQ(Value *LHS, const FixedPointSemantics &LHSSema, + Value *RHS, const FixedPointSemantics &RHSSema) { + auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema); + + Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema); + Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema); + + return B.CreateICmpEQ(WideLHS, WideRHS); + } + + /// Compare two fixed-point values for inequality. + /// \p LHS - The left hand side + /// \p LHSSema - The semantic of the left hand side + /// \p RHS - The right hand side + /// \p RHSSema - The semantic of the right hand side + Value *CreateNE(Value *LHS, const FixedPointSemantics &LHSSema, + Value *RHS, const FixedPointSemantics &RHSSema) { + auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema); + + Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema); + Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema); + + return B.CreateICmpNE(WideLHS, WideRHS); + } + + /// Compare two fixed-point values as LHS < RHS. + /// \p LHS - The left hand side + /// \p LHSSema - The semantic of the left hand side + /// \p RHS - The right hand side + /// \p RHSSema - The semantic of the right hand side + Value *CreateLT(Value *LHS, const FixedPointSemantics &LHSSema, + Value *RHS, const FixedPointSemantics &RHSSema) { + auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema); + + Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema); + Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema); + + return CommonSema.isSigned() ? B.CreateICmpSLT(WideLHS, WideRHS) + : B.CreateICmpULT(WideLHS, WideRHS); + } + + /// Compare two fixed-point values as LHS <= RHS. + /// \p LHS - The left hand side + /// \p LHSSema - The semantic of the left hand side + /// \p RHS - The right hand side + /// \p RHSSema - The semantic of the right hand side + Value *CreateLE(Value *LHS, const FixedPointSemantics &LHSSema, + Value *RHS, const FixedPointSemantics &RHSSema) { + auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema); + + Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema); + Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema); + + return CommonSema.isSigned() ? B.CreateICmpSLE(WideLHS, WideRHS) + : B.CreateICmpULE(WideLHS, WideRHS); + } + + /// Compare two fixed-point values as LHS > RHS. + /// \p LHS - The left hand side + /// \p LHSSema - The semantic of the left hand side + /// \p RHS - The right hand side + /// \p RHSSema - The semantic of the right hand side + Value *CreateGT(Value *LHS, const FixedPointSemantics &LHSSema, + Value *RHS, const FixedPointSemantics &RHSSema) { + auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema); + + Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema); + Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema); + + return CommonSema.isSigned() ? B.CreateICmpSGT(WideLHS, WideRHS) + : B.CreateICmpUGT(WideLHS, WideRHS); + } + + /// Compare two fixed-point values as LHS >= RHS. + /// \p LHS - The left hand side + /// \p LHSSema - The semantic of the left hand side + /// \p RHS - The right hand side + /// \p RHSSema - The semantic of the right hand side + Value *CreateGE(Value *LHS, const FixedPointSemantics &LHSSema, + Value *RHS, const FixedPointSemantics &RHSSema) { + auto CommonSema = getCommonBinopSemantic(LHSSema, RHSSema); + + Value *WideLHS = CreateFixedToFixed(LHS, LHSSema, CommonSema); + Value *WideRHS = CreateFixedToFixed(RHS, RHSSema, CommonSema); + + return CommonSema.isSigned() ? B.CreateICmpSGE(WideLHS, WideRHS) + : B.CreateICmpUGE(WideLHS, WideRHS); + } +}; + +} // end namespace llvm + +#endif // LLVM_IR_FIXEDPOINTBUILDER_H diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index bb4ec13c7610..7e209bb3769b 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -199,6 +199,15 @@ public: /// returns Intrinsic::not_intrinsic! bool isIntrinsic() const { return HasLLVMReservedName; } + /// isTargetIntrinsic - Returns true if IID is an intrinsic specific to a + /// certain target. If it is a generic intrinsic false is returned. + static bool isTargetIntrinsic(Intrinsic::ID IID); + + /// isTargetIntrinsic - Returns true if this function is an intrinsic and the + /// intrinsic is specific to a certain target. If this is not an intrinsic + /// or a generic intrinsic, false is returned. + bool isTargetIntrinsic() const; + /// Returns true if the function is one of the "Constrained Floating-Point /// Intrinsics". Returns false if not, and returns false when /// getIntrinsicID() returns Intrinsic::not_intrinsic. @@ -259,6 +268,12 @@ public: getContext(), AttributeList::FunctionIndex, Kind)); } + /// A function will have the "coroutine.presplit" attribute if it's + /// a coroutine and has not gone through full CoroSplit pass. + bool isPresplitCoroutine() const { + return hasFnAttribute("coroutine.presplit"); + } + enum ProfileCountType { PCT_Invalid, PCT_Real, PCT_Synthetic }; /// Class to represent profile counts. @@ -372,6 +387,9 @@ public: void setGC(std::string Str); void clearGC(); + /// Returns true if the function has ssp, sspstrong, or sspreq fn attrs. + bool hasStackProtectorFnAttr() const; + /// adds the attribute to the list of attributes. void addAttribute(unsigned i, Attribute::AttrKind Kind); @@ -463,8 +481,17 @@ public: /// Extract the byval type for a parameter. Type *getParamByValType(unsigned ArgNo) const { - Type *Ty = AttributeSets.getParamByValType(ArgNo); - return Ty ? Ty : (arg_begin() + ArgNo)->getType()->getPointerElementType(); + return AttributeSets.getParamByValType(ArgNo); + } + + /// Extract the sret type for a parameter. + Type *getParamStructRetType(unsigned ArgNo) const { + return AttributeSets.getParamStructRetType(ArgNo); + } + + /// Extract the byref type for a parameter. + Type *getParamByRefType(unsigned ArgNo) const { + return AttributeSets.getParamByRefType(ArgNo); } /// Extract the number of dereferenceable bytes for a call or @@ -606,6 +633,17 @@ public: addFnAttr(Attribute::NoRecurse); } + /// Determine if the function is required to make forward progress. + bool mustProgress() const { + return hasFnAttribute(Attribute::MustProgress) || + hasFnAttribute(Attribute::WillReturn); + } + void setMustProgress() { addFnAttr(Attribute::MustProgress); } + + /// Determine if the function will return. + bool willReturn() const { return hasFnAttribute(Attribute::WillReturn); } + void setWillReturn() { addFnAttr(Attribute::WillReturn); } + /// True if the ABI mandates (or the user requested) that this /// function be in a unwind table. bool hasUWTable() const { @@ -648,6 +686,10 @@ public: return hasFnAttribute(Attribute::OptimizeForSize) || hasMinSize(); } + /// Returns the denormal handling type for the default rounding mode of the + /// function. + DenormalMode getDenormalMode(const fltSemantics &FPType) const; + /// copyAttributesFrom - copy all additional attributes (those not needed to /// create a Function) from the Function Src to this one. void copyAttributesFrom(const Function *Src); diff --git a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h index 79ea5791b2fd..6293305a2639 100644 --- a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h +++ b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h @@ -83,7 +83,7 @@ namespace llvm { if (isa<ScalableVectorType>(VTy)) NumElements = Unbounded; else - NumElements = VTy->getNumElements(); + NumElements = cast<FixedVectorType>(VTy)->getNumElements(); } else CurTy = dyn_cast<StructType>(Ty); ++OpIt; diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h index 3a7b718845cb..d01abdc3b625 100644 --- a/llvm/include/llvm/IR/GlobalObject.h +++ b/llvm/include/llvm/IR/GlobalObject.h @@ -54,7 +54,6 @@ protected: Comdat *ObjComdat; enum { LastAlignmentBit = 4, - HasMetadataHashEntryBit, HasSectionHashEntryBit, GlobalObjectBits, @@ -127,58 +126,13 @@ public: Comdat *getComdat() { return ObjComdat; } void setComdat(Comdat *C) { ObjComdat = C; } - /// Check if this has any metadata. - bool hasMetadata() const { return hasMetadataHashEntry(); } - - /// Check if this has any metadata of the given kind. - bool hasMetadata(unsigned KindID) const { - return getMetadata(KindID) != nullptr; - } - bool hasMetadata(StringRef Kind) const { - return getMetadata(Kind) != nullptr; - } - - /// Get the current metadata attachments for the given kind, if any. - /// - /// These functions require that the function have at most a single attachment - /// of the given kind, and return \c nullptr if such an attachment is missing. - /// @{ - MDNode *getMetadata(unsigned KindID) const; - MDNode *getMetadata(StringRef Kind) const; - /// @} - - /// Appends all attachments with the given ID to \c MDs in insertion order. - /// If the global has no attachments with the given ID, or if ID is invalid, - /// leaves MDs unchanged. - /// @{ - void getMetadata(unsigned KindID, SmallVectorImpl<MDNode *> &MDs) const; - void getMetadata(StringRef Kind, SmallVectorImpl<MDNode *> &MDs) const; - /// @} - - /// Set a particular kind of metadata attachment. - /// - /// Sets the given attachment to \c MD, erasing it if \c MD is \c nullptr or - /// replacing it if it already exists. - /// @{ - void setMetadata(unsigned KindID, MDNode *MD); - void setMetadata(StringRef Kind, MDNode *MD); - /// @} - - /// Add a metadata attachment. - /// @{ - void addMetadata(unsigned KindID, MDNode &MD); - void addMetadata(StringRef Kind, MDNode &MD); - /// @} - - /// Appends all attachments for the global to \c MDs, sorting by attachment - /// ID. Attachments with the same ID appear in insertion order. - void - getAllMetadata(SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs) const; - - /// Erase all metadata attachments with the given kind. - /// - /// \returns true if any metadata was removed. - bool eraseMetadata(unsigned KindID); + using Value::addMetadata; + using Value::clearMetadata; + using Value::eraseMetadata; + using Value::getAllMetadata; + using Value::getMetadata; + using Value::hasMetadata; + using Value::setMetadata; /// Copy metadata from Src, adjusting offsets by Offset. void copyMetadata(const GlobalObject *Src, unsigned Offset); @@ -204,8 +158,6 @@ public: V->getValueID() == Value::GlobalVariableVal; } - void clearMetadata(); - private: void setGlobalObjectFlag(unsigned Bit, bool Val) { unsigned Mask = 1 << Bit; @@ -213,13 +165,6 @@ private: (Val ? Mask : 0u)); } - bool hasMetadataHashEntry() const { - return getGlobalValueSubClassData() & (1 << HasMetadataHashEntryBit); - } - void setHasMetadataHashEntry(bool HasEntry) { - setGlobalObjectFlag(HasMetadataHashEntryBit, HasEntry); - } - StringRef getSectionImpl() const; }; diff --git a/llvm/include/llvm/IR/GlobalVariable.h b/llvm/include/llvm/IR/GlobalVariable.h index 12093e337d6e..674d49eb9de6 100644 --- a/llvm/include/llvm/IR/GlobalVariable.h +++ b/llvm/include/llvm/IR/GlobalVariable.h @@ -56,10 +56,11 @@ public: bool isExternallyInitialized = false); /// GlobalVariable ctor - This creates a global and inserts it before the /// specified other global. - GlobalVariable(Module &M, Type *Ty, bool isConstant, - LinkageTypes Linkage, Constant *Initializer, - const Twine &Name = "", GlobalVariable *InsertBefore = nullptr, - ThreadLocalMode = NotThreadLocal, unsigned AddressSpace = 0, + GlobalVariable(Module &M, Type *Ty, bool isConstant, LinkageTypes Linkage, + Constant *Initializer, const Twine &Name = "", + GlobalVariable *InsertBefore = nullptr, + ThreadLocalMode = NotThreadLocal, + Optional<unsigned> AddressSpace = None, bool isExternallyInitialized = false); GlobalVariable(const GlobalVariable &) = delete; GlobalVariable &operator=(const GlobalVariable &) = delete; diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 4552ca016bd7..9cefc9aa764c 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -17,6 +17,7 @@ #include "llvm-c/Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/BasicBlock.h" @@ -24,6 +25,7 @@ #include "llvm/IR/ConstantFolder.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -91,7 +93,28 @@ public: /// Common base class shared among various IRBuilders. class IRBuilderBase { - DebugLoc CurDbgLocation; + /// Pairs of (metadata kind, MDNode *) that should be added to all newly + /// created instructions, like !dbg metadata. + SmallVector<std::pair<unsigned, MDNode *>, 2> MetadataToCopy; + + /// Add or update the an entry (Kind, MD) to MetadataToCopy, if \p MD is not + /// null. If \p MD is null, remove the entry with \p Kind. + void AddOrRemoveMetadataToCopy(unsigned Kind, MDNode *MD) { + if (!MD) { + erase_if(MetadataToCopy, [Kind](const std::pair<unsigned, MDNode *> &KV) { + return KV.first == Kind; + }); + return; + } + + for (auto &KV : MetadataToCopy) + if (KV.first == Kind) { + KV.second = MD; + return; + } + + MetadataToCopy.emplace_back(Kind, MD); + } protected: BasicBlock *BB; @@ -125,7 +148,7 @@ public: template<typename InstTy> InstTy *Insert(InstTy *I, const Twine &Name = "") const { Inserter.InsertHelper(I, Name, BB, InsertPt); - SetInstDebugLocation(I); + AddMetadataToInst(I); return I; } @@ -182,16 +205,42 @@ public: } /// Set location information used by debugging information. - void SetCurrentDebugLocation(DebugLoc L) { CurDbgLocation = std::move(L); } + void SetCurrentDebugLocation(DebugLoc L) { + AddOrRemoveMetadataToCopy(LLVMContext::MD_dbg, L.getAsMDNode()); + } + + /// Collect metadata with IDs \p MetadataKinds from \p Src which should be + /// added to all created instructions. Entries present in MedataDataToCopy but + /// not on \p Src will be dropped from MetadataToCopy. + void CollectMetadataToCopy(Instruction *Src, + ArrayRef<unsigned> MetadataKinds) { + for (unsigned K : MetadataKinds) + AddOrRemoveMetadataToCopy(K, Src->getMetadata(K)); + } /// Get location information used by debugging information. - const DebugLoc &getCurrentDebugLocation() const { return CurDbgLocation; } + DebugLoc getCurrentDebugLocation() const { + for (auto &KV : MetadataToCopy) + if (KV.first == LLVMContext::MD_dbg) + return {cast<DILocation>(KV.second)}; + + return {}; + } /// If this builder has a current debug location, set it on the /// specified instruction. void SetInstDebugLocation(Instruction *I) const { - if (CurDbgLocation) - I->setDebugLoc(CurDbgLocation); + for (const auto &KV : MetadataToCopy) + if (KV.first == LLVMContext::MD_dbg) { + I->setDebugLoc(DebugLoc(KV.second)); + return; + } + } + + /// Add all entries in MetadataToCopy to \p I. + void AddMetadataToInst(Instruction *I) const { + for (auto &KV : MetadataToCopy) + I->setMetadata(KV.first, KV.second); } /// Get the return type of the current function that we're emitting @@ -266,11 +315,19 @@ public: /// Set the exception handling to be used with constrained floating point void setDefaultConstrainedExcept(fp::ExceptionBehavior NewExcept) { +#ifndef NDEBUG + Optional<StringRef> ExceptStr = ExceptionBehaviorToStr(NewExcept); + assert(ExceptStr.hasValue() && "Garbage strict exception behavior!"); +#endif DefaultConstrainedExcept = NewExcept; } /// Set the rounding mode handling to be used with constrained floating point void setDefaultConstrainedRounding(RoundingMode NewRounding) { +#ifndef NDEBUG + Optional<StringRef> RoundingStr = RoundingModeToStr(NewRounding); + assert(RoundingStr.hasValue() && "Garbage strict rounding mode!"); +#endif DefaultConstrainedRounding = NewRounding; } @@ -293,9 +350,8 @@ public: } } - void setConstrainedFPCallAttr(CallInst *I) { - if (!I->hasFnAttr(Attribute::StrictFP)) - I->addAttribute(AttributeList::FunctionIndex, Attribute::StrictFP); + void setConstrainedFPCallAttr(CallBase *I) { + I->addAttribute(AttributeList::FunctionIndex, Attribute::StrictFP); } void setDefaultOperandBundles(ArrayRef<OperandBundleDef> OpBundles) { @@ -386,8 +442,12 @@ public: /// filled in with the null terminated string value specified. The new global /// variable will be marked mergable with any others of the same contents. If /// Name is specified, it is the name of the global variable created. + /// + /// If no module is given via \p M, it is take from the insertion point basic + /// block. GlobalVariable *CreateGlobalString(StringRef Str, const Twine &Name = "", - unsigned AddressSpace = 0); + unsigned AddressSpace = 0, + Module *M = nullptr); /// Get a constant value representing either true or false. ConstantInt *getInt1(bool V) { @@ -570,12 +630,22 @@ public: NoAliasTag); } + CallInst *CreateMemTransferInst( + Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, + MaybeAlign SrcAlign, Value *Size, bool isVolatile = false, + MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, + MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); + CallInst *CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, - MDNode *NoAliasTag = nullptr); + MDNode *NoAliasTag = nullptr) { + return CreateMemTransferInst(Intrinsic::memcpy, Dst, DstAlign, Src, + SrcAlign, Size, isVolatile, TBAATag, + TBAAStructTag, ScopeTag, NoAliasTag); + } CallInst *CreateMemCpyInline(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size); @@ -709,11 +779,11 @@ public: /// Create a vector float max reduction intrinsic of the source /// vector. - CallInst *CreateFPMaxReduce(Value *Src, bool NoNaN = false); + CallInst *CreateFPMaxReduce(Value *Src); /// Create a vector float min reduction intrinsic of the source /// vector. - CallInst *CreateFPMinReduce(Value *Src, bool NoNaN = false); + CallInst *CreateFPMinReduce(Value *Src); /// Create a lifetime.start intrinsic. /// @@ -788,6 +858,13 @@ public: CallInst *CreateAssumption(Value *Cond, ArrayRef<OperandBundleDef> OpBundles = llvm::None); + /// Create a llvm.experimental.noalias.scope.decl intrinsic call. + Instruction *CreateNoAliasScopeDeclaration(Value *Scope); + Instruction *CreateNoAliasScopeDeclaration(MDNode *ScopeTag) { + return CreateNoAliasScopeDeclaration( + MetadataAsValue::get(Context, ScopeTag)); + } + /// Create a call to the experimental.gc.statepoint intrinsic to /// start a new statepoint sequence. CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, @@ -801,7 +878,7 @@ public: /// start a new statepoint sequence. CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, uint32_t Flags, - ArrayRef<Use> CallArgs, + ArrayRef<Value *> CallArgs, Optional<ArrayRef<Use>> TransitionArgs, Optional<ArrayRef<Use>> DeoptArgs, ArrayRef<Value *> GCArgs, @@ -830,7 +907,7 @@ public: InvokeInst *CreateGCStatepointInvoke( uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags, - ArrayRef<Use> InvokeArgs, Optional<ArrayRef<Use>> TransitionArgs, + ArrayRef<Value *> InvokeArgs, Optional<ArrayRef<Use>> TransitionArgs, Optional<ArrayRef<Use>> DeoptArgs, ArrayRef<Value *> GCArgs, const Twine &Name = ""); @@ -858,6 +935,10 @@ public: Type *ResultType, const Twine &Name = ""); + /// Create a call to llvm.vscale, multiplied by \p Scaling. The type of VScale + /// will be the same type as that of \p Scaling. + Value *CreateVScale(Constant *Scaling, const Twine &Name = ""); + /// Create a call to intrinsic \p ID with 1 operand which is mangled on its /// type. CallInst *CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, @@ -898,6 +979,22 @@ public: return CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS, nullptr, Name); } + /// Create a call to the experimental.vector.extract intrinsic. + CallInst *CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, + const Twine &Name = "") { + return CreateIntrinsic(Intrinsic::experimental_vector_extract, + {DstType, SrcVec->getType()}, {SrcVec, Idx}, nullptr, + Name); + } + + /// Create a call to the experimental.vector.insert intrinsic. + CallInst *CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, + Value *Idx, const Twine &Name = "") { + return CreateIntrinsic(Intrinsic::experimental_vector_insert, + {DstType, SubVec->getType()}, {SrcVec, SubVec, Idx}, + nullptr, Name); + } + private: /// Create a call to a masked intrinsic with given Id. CallInst *CreateMaskedIntrinsic(Intrinsic::ID Id, ArrayRef<Value *> Ops, @@ -998,16 +1095,21 @@ public: ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> OpBundles, const Twine &Name = "") { - return Insert( - InvokeInst::Create(Ty, Callee, NormalDest, UnwindDest, Args, OpBundles), - Name); + InvokeInst *II = + InvokeInst::Create(Ty, Callee, NormalDest, UnwindDest, Args, OpBundles); + if (IsFPConstrained) + setConstrainedFPCallAttr(II); + return Insert(II, Name); } InvokeInst *CreateInvoke(FunctionType *Ty, Value *Callee, BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef<Value *> Args = None, const Twine &Name = "") { - return Insert(InvokeInst::Create(Ty, Callee, NormalDest, UnwindDest, Args), - Name); + InvokeInst *II = + InvokeInst::Create(Ty, Callee, NormalDest, UnwindDest, Args); + if (IsFPConstrained) + setConstrainedFPCallAttr(II); + return Insert(II, Name); } InvokeInst *CreateInvoke(FunctionCallee Callee, BasicBlock *NormalDest, @@ -1938,9 +2040,13 @@ public: /// Same as CreateGlobalString, but return a pointer with "i8*" type /// instead of a pointer to array of i8. + /// + /// If no module is given via \p M, it is take from the insertion point basic + /// block. Constant *CreateGlobalStringPtr(StringRef Str, const Twine &Name = "", - unsigned AddressSpace = 0) { - GlobalVariable *GV = CreateGlobalString(Str, Name, AddressSpace); + unsigned AddressSpace = 0, + Module *M = nullptr) { + GlobalVariable *GV = CreateGlobalString(Str, Name, AddressSpace, M); Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0); Constant *Indices[] = {Zero, Zero}; return ConstantExpr::getInBoundsGetElementPtr(GV->getValueType(), GV, @@ -2424,6 +2530,13 @@ public: return Insert(new ShuffleVectorInst(V1, V2, Mask), Name); } + /// Create a unary shuffle. The second vector operand of the IR instruction + /// is poison. + Value *CreateShuffleVector(Value *V, ArrayRef<int> Mask, + const Twine &Name = "") { + return CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask, Name); + } + Value *CreateExtractValue(Value *Agg, ArrayRef<unsigned> Idxs, const Twine &Name = "") { @@ -2488,6 +2601,10 @@ public: /// NumElts elements. Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = ""); + /// Return a vector value that contains \arg V broadcasted to \p + /// EC elements. + Value *CreateVectorSplat(ElementCount EC, Value *V, const Twine &Name = ""); + /// Return a value that has been extracted from a larger integer type. Value *CreateExtractInteger(const DataLayout &DL, Value *From, IntegerType *ExtractedTy, uint64_t Offset, diff --git a/llvm/include/llvm/IR/IRPrintingPasses.h b/llvm/include/llvm/IR/IRPrintingPasses.h index 3a1c489ee09f..2e62be7cd1ec 100644 --- a/llvm/include/llvm/IR/IRPrintingPasses.h +++ b/llvm/include/llvm/IR/IRPrintingPasses.h @@ -18,11 +18,12 @@ #ifndef LLVM_IR_IRPRINTINGPASSES_H #define LLVM_IR_IRPRINTINGPASSES_H -#include "llvm/ADT/StringRef.h" #include "llvm/IR/PassManager.h" #include <string> namespace llvm { +class raw_ostream; +class StringRef; /// Create and return a pass that writes the module to the specified /// \c raw_ostream. @@ -44,22 +45,6 @@ void printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name); /// Return true if a pass is for IR printing. bool isIRPrintingPass(Pass *P); -/// isFunctionInPrintList - returns true if a function should be printed via -// debugging options like -print-after-all/-print-before-all. -// Tells if the function IR should be printed by PrinterPass. -extern bool isFunctionInPrintList(StringRef FunctionName); - -/// forcePrintModuleIR - returns true if IR printing passes should -// be printing module IR (even for local-pass printers e.g. function-pass) -// to provide more context, as enabled by debugging option -print-module-scope -// Tells if IR printer should be printing module IR -extern bool forcePrintModuleIR(); - -extern bool shouldPrintBeforePass(); -extern bool shouldPrintBeforePass(StringRef); -extern bool shouldPrintAfterPass(); -extern bool shouldPrintAfterPass(StringRef); - /// Pass for printing a Module as LLVM's text IR assembly. /// /// Note: This pass is for use with the new pass manager. Use the create...Pass @@ -75,6 +60,7 @@ public: bool ShouldPreserveUseListOrder = false); PreservedAnalyses run(Module &M, AnalysisManager<Module> &); + static bool isRequired() { return true; } }; /// Pass for printing a Function as LLVM's text IR assembly. @@ -90,8 +76,9 @@ public: PrintFunctionPass(raw_ostream &OS, const std::string &Banner = ""); PreservedAnalyses run(Function &F, AnalysisManager<Function> &); + static bool isRequired() { return true; } }; -} // End llvm namespace +} // namespace llvm #endif diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 07af00ec9240..f42ef48de6b3 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -599,12 +599,6 @@ public: BasicBlock *InsertAtEnd ///< The block to insert the instruction into ); - /// Check whether it is valid to call getCastOpcode for these types. - static bool isCastable( - Type *SrcTy, ///< The Type from which the value should be cast. - Type *DestTy ///< The Type to which the value should be cast. - ); - /// Check whether a bitcast between these types is valid static bool isBitCastable( Type *SrcTy, ///< The Type from which the value should be cast. @@ -650,8 +644,8 @@ public: /// DataLayout argument is to determine the pointer size when examining casts /// involving Integer and Pointer types. They are no-op casts if the integer /// is the same size as the pointer. However, pointer size varies with - /// platform. - /// Determine if the described cast is a no-op cast. + /// platform. Note that a precondition of this method is that the cast is + /// legal - i.e. the instruction formed with these operands would verify. static bool isNoopCast( Instruction::CastOps Opcode, ///< Opcode of cast Type *SrcTy, ///< SrcTy of cast @@ -691,11 +685,14 @@ public: /// Return the destination type, as a convenience Type* getDestTy() const { return getType(); } - /// This method can be used to determine if a cast from S to DstTy using + /// This method can be used to determine if a cast from SrcTy to DstTy using /// Opcode op is valid or not. /// @returns true iff the proposed cast is valid. /// Determine if a cast is valid without creating one. - static bool castIsValid(Instruction::CastOps op, Value *S, Type *DstTy); + static bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy); + static bool castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { + return castIsValid(op, S->getType(), DstTy); + } /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { @@ -805,8 +802,8 @@ public: void setPredicate(Predicate P) { setSubclassData<PredicateField>(P); } static bool isFPPredicate(Predicate P) { - assert(FIRST_FCMP_PREDICATE == 0 && - "FIRST_FCMP_PREDICATE is required to be 0"); + static_assert(FIRST_FCMP_PREDICATE == 0, + "FIRST_FCMP_PREDICATE is required to be 0"); return P <= LAST_FCMP_PREDICATE; } @@ -848,20 +845,38 @@ public: /// Return the predicate as if the operands were swapped. static Predicate getSwappedPredicate(Predicate pred); - /// For predicate of kind "is X or equal to 0" returns the predicate "is X". - /// For predicate of kind "is X" returns the predicate "is X or equal to 0". - /// does not support other kind of predicates. - /// @returns the predicate that does not contains is equal to zero if - /// it had and vice versa. - /// Return the flipped strictness of predicate - Predicate getFlippedStrictnessPredicate() const { - return getFlippedStrictnessPredicate(getPredicate()); + /// This is a static version that you can use without an instruction + /// available. + /// @returns true if the comparison predicate is strict, false otherwise. + static bool isStrictPredicate(Predicate predicate); + + /// @returns true if the comparison predicate is strict, false otherwise. + /// Determine if this instruction is using an strict comparison predicate. + bool isStrictPredicate() const { return isStrictPredicate(getPredicate()); } + + /// This is a static version that you can use without an instruction + /// available. + /// @returns true if the comparison predicate is non-strict, false otherwise. + static bool isNonStrictPredicate(Predicate predicate); + + /// @returns true if the comparison predicate is non-strict, false otherwise. + /// Determine if this instruction is using an non-strict comparison predicate. + bool isNonStrictPredicate() const { + return isNonStrictPredicate(getPredicate()); + } + + /// For example, SGE -> SGT, SLE -> SLT, ULE -> ULT, UGE -> UGT. + /// Returns the strict version of non-strict comparisons. + Predicate getStrictPredicate() const { + return getStrictPredicate(getPredicate()); } /// This is a static version that you can use without an instruction /// available. - /// Return the flipped strictness of predicate - static Predicate getFlippedStrictnessPredicate(Predicate pred); + /// @returns the strict version of comparison provided in \p pred. + /// If \p pred is not a strict comparison predicate, returns \p pred. + /// Returns the strict version of non-strict comparisons. + static Predicate getStrictPredicate(Predicate pred); /// For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE. /// Returns the non-strict version of strict comparisons. @@ -876,6 +891,21 @@ public: /// Returns the non-strict version of strict comparisons. static Predicate getNonStrictPredicate(Predicate pred); + /// This is a static version that you can use without an instruction + /// available. + /// Return the flipped strictness of predicate + static Predicate getFlippedStrictnessPredicate(Predicate pred); + + /// For predicate of kind "is X or equal to 0" returns the predicate "is X". + /// For predicate of kind "is X" returns the predicate "is X or equal to 0". + /// does not support other kind of predicates. + /// @returns the predicate that does not contains is equal to zero if + /// it had and vice versa. + /// Return the flipped strictness of predicate + Predicate getFlippedStrictnessPredicate() const { + return getFlippedStrictnessPredicate(getPredicate()); + } + /// Provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); @@ -888,9 +918,19 @@ public: /// Determine if this CmpInst is commutative. bool isCommutative() const; - /// This is just a convenience that dispatches to the subclasses. /// Determine if this is an equals/not equals predicate. - bool isEquality() const; + /// This is a static version that you can use without an instruction + /// available. + static bool isEquality(Predicate pred); + + /// Determine if this is an equals/not equals predicate. + bool isEquality() const { return isEquality(getPredicate()); } + + /// Return true if the predicate is relational (not EQ or NE). + static bool isRelational(Predicate P) { return !isEquality(P); } + + /// Return true if the predicate is relational (not EQ or NE). + bool isRelational() const { return !isEquality(); } /// @returns true if the comparison is signed, false otherwise. /// Determine if this instruction is using a signed comparison. @@ -917,6 +957,30 @@ public: return getSignedPredicate(getPredicate()); } + /// For example, SLT->ULT, SLE->ULE, SGT->UGT, SGE->UGE, ULT->Failed assert + /// @returns the unsigned version of the signed predicate pred. + static Predicate getUnsignedPredicate(Predicate pred); + + /// For example, SLT->ULT, SLE->ULE, SGT->UGT, SGE->UGE, ULT->Failed assert + /// @returns the unsigned version of the predicate for this instruction (which + /// has to be an signed predicate). + /// return the unsigned version of a predicate + Predicate getUnsignedPredicate() { + return getUnsignedPredicate(getPredicate()); + } + + /// For example, SLT->ULT, ULT->SLT, SLE->ULE, ULE->SLE, EQ->Failed assert + /// @returns the unsigned version of the signed predicate pred or + /// the signed version of the signed predicate pred. + static Predicate getFlippedSignednessPredicate(Predicate pred); + + /// For example, SLT->ULT, ULT->SLT, SLE->ULE, ULE->SLE, EQ->Failed assert + /// @returns the unsigned version of the signed predicate pred or + /// the signed version of the signed predicate pred. + Predicate getFlippedSignednessPredicate() { + return getFlippedSignednessPredicate(getPredicate()); + } + /// This is just a convenience. /// Determine if this is true when both operands are the same. bool isTrueWhenEqual() const { @@ -1062,7 +1126,7 @@ public: explicit OperandBundleDefT(const OperandBundleUse &OBU) { Tag = std::string(OBU.getTagName()); - Inputs.insert(Inputs.end(), OBU.Inputs.begin(), OBU.Inputs.end()); + llvm::append_range(Inputs, OBU.Inputs); } ArrayRef<InputTy> inputs() const { return Inputs; } @@ -1301,7 +1365,7 @@ public: /// Returns true if this CallSite passes the given Value* as an argument to /// the called function. bool hasArgument(const Value *V) const { - return llvm::any_of(args(), [V](const Value *Arg) { return Arg == V; }); + return llvm::is_contained(args(), V); } Value *getCalledOperand() const { return Op<CalledOperandOpEndIdx>(); } @@ -1393,14 +1457,18 @@ public: /// void setAttributes(AttributeList A) { Attrs = A; } - /// Determine whether this call has the given attribute. + /// Determine whether this call has the given attribute. If it does not + /// then determine if the called function has the attribute, but only if + /// the attribute is allowed for the call. bool hasFnAttr(Attribute::AttrKind Kind) const { assert(Kind != Attribute::NoBuiltin && "Use CallBase::isNoBuiltin() to check for Attribute::NoBuiltin"); return hasFnAttrImpl(Kind); } - /// Determine whether this call has the given attribute. + /// Determine whether this call has the given attribute. If it does not + /// then determine if the called function has the attribute, but only if + /// the attribute is allowed for the call. bool hasFnAttr(StringRef Kind) const { return hasFnAttrImpl(Kind); } /// adds the attribute to the list of attributes. @@ -1447,6 +1515,12 @@ public: setAttributes(PAL); } + void removeAttributes(unsigned i, const AttrBuilder &Attrs) { + AttributeList PAL = getAttributes(); + PAL = PAL.removeAttributes(getContext(), i, Attrs); + setAttributes(PAL); + } + /// Removes the attribute from the given argument void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { assert(ArgNo < getNumArgOperands() && "Out of bounds"); @@ -1479,7 +1553,11 @@ public: } /// Determine whether the return value has the given attribute. - bool hasRetAttr(Attribute::AttrKind Kind) const; + bool hasRetAttr(Attribute::AttrKind Kind) const { + return hasRetAttrImpl(Kind); + } + /// Determine whether the return value has the given attribute. + bool hasRetAttr(StringRef Kind) const { return hasRetAttrImpl(Kind); } /// Determine whether the argument or parameter has the given attribute. bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const; @@ -1678,6 +1756,10 @@ public: bool onlyReadsMemory() const { return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly); } + + /// Returns true if this function is guaranteed to return. + bool willReturn() const { return hasFnAttr(Attribute::WillReturn); } + void setOnlyReadsMemory() { addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly); } @@ -2158,6 +2240,18 @@ private: return hasFnAttrOnCalledFunction(Kind); } + + /// Determine whether the return value has the given attribute. Supports + /// Attribute::AttrKind and StringRef as \p AttrKind types. + template <typename AttrKind> bool hasRetAttrImpl(AttrKind Kind) const { + if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind)) + return true; + + // Look at the callee, if available. + if (const Function *F = getCalledFunction()) + return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind); + return false; + } }; template <> diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index a03eac0ad40d..d2a55f89fac9 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -256,13 +256,11 @@ public: //===--------------------------------------------------------------------===// /// Return true if this instruction has any metadata attached to it. - bool hasMetadata() const { return DbgLoc || hasMetadataHashEntry(); } + bool hasMetadata() const { return DbgLoc || Value::hasMetadata(); } /// Return true if this instruction has metadata attached to it other than a /// debug location. - bool hasMetadataOtherThanDebugLoc() const { - return hasMetadataHashEntry(); - } + bool hasMetadataOtherThanDebugLoc() const { return Value::hasMetadata(); } /// Return true if this instruction has the given type of metadata attached. bool hasMetadata(unsigned KindID) const { @@ -301,8 +299,7 @@ public: /// debug location. void getAllMetadataOtherThanDebugLoc( SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs) const { - if (hasMetadataOtherThanDebugLoc()) - getAllMetadataOtherThanDebugLocImpl(MDs); + Value::getAllMetadata(MDs); } /// Fills the AAMDNodes structure with AA metadata from this instruction. @@ -343,6 +340,11 @@ public: } /// @} + /// Adds an !annotation metadata node with \p Annotation to this instruction. + /// If this instruction already has !annotation metadata, append \p Annotation + /// to the existing node. + void addAnnotationMetadata(StringRef Annotation); + /// Sets the metadata on this instruction from the AAMDNodes structure. void setAAMetadata(const AAMDNodes &N); @@ -492,21 +494,26 @@ public: /// merged DebugLoc. void applyMergedLocation(const DILocation *LocA, const DILocation *LocB); -private: - /// Return true if we have an entry in the on-the-side metadata hash. - bool hasMetadataHashEntry() const { - return Bitfield::test<HasMetadataField>(getSubclassDataFromValue()); - } + /// Updates the debug location given that the instruction has been hoisted + /// from a block to a predecessor of that block. + /// Note: it is undefined behavior to call this on an instruction not + /// currently inserted into a function. + void updateLocationAfterHoist(); + /// Drop the instruction's debug location. This does not guarantee removal + /// of the !dbg source location attachment, as it must set a line 0 location + /// with scope information attached on call instructions. To guarantee + /// removal of the !dbg attachment, use the \ref setDebugLoc() API. + /// Note: it is undefined behavior to call this on an instruction not + /// currently inserted into a function. + void dropLocation(); + +private: // These are all implemented in Metadata.cpp. MDNode *getMetadataImpl(unsigned KindID) const; MDNode *getMetadataImpl(StringRef Kind) const; void getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned, MDNode *>> &) const; - void getAllMetadataOtherThanDebugLocImpl( - SmallVectorImpl<std::pair<unsigned, MDNode *>> &) const; - /// Clear all hashtable-based metadata from this instruction. - void clearMetadataHashEntries(); public: //===--------------------------------------------------------------------===// @@ -532,7 +539,7 @@ public: /// In LLVM, these are the commutative operators, plus SetEQ and SetNE, when /// applied to any type. /// - bool isCommutative() const { return isCommutative(getOpcode()); } + bool isCommutative() const LLVM_READONLY; static bool isCommutative(unsigned Opcode) { switch (Opcode) { case Add: case FAdd: @@ -644,19 +651,25 @@ public: bool isLifetimeStartOrEnd() const; /// Return a pointer to the next non-debug instruction in the same basic - /// block as 'this', or nullptr if no such instruction exists. - const Instruction *getNextNonDebugInstruction() const; - Instruction *getNextNonDebugInstruction() { + /// block as 'this', or nullptr if no such instruction exists. Skip any pseudo + /// operations if \c SkipPseudoOp is true. + const Instruction * + getNextNonDebugInstruction(bool SkipPseudoOp = false) const; + Instruction *getNextNonDebugInstruction(bool SkipPseudoOp = false) { return const_cast<Instruction *>( - static_cast<const Instruction *>(this)->getNextNonDebugInstruction()); + static_cast<const Instruction *>(this)->getNextNonDebugInstruction( + SkipPseudoOp)); } /// Return a pointer to the previous non-debug instruction in the same basic - /// block as 'this', or nullptr if no such instruction exists. - const Instruction *getPrevNonDebugInstruction() const; - Instruction *getPrevNonDebugInstruction() { + /// block as 'this', or nullptr if no such instruction exists. Skip any pseudo + /// operations if \c SkipPseudoOp is true. + const Instruction * + getPrevNonDebugInstruction(bool SkipPseudoOp = false) const; + Instruction *getPrevNonDebugInstruction(bool SkipPseudoOp = false) { return const_cast<Instruction *>( - static_cast<const Instruction *>(this)->getPrevNonDebugInstruction()); + static_cast<const Instruction *>(this)->getPrevNonDebugInstruction( + SkipPseudoOp)); } /// Create a copy of 'this' instruction that is identical in all ways except @@ -787,8 +800,6 @@ private: return Value::getSubclassDataFromValue(); } - void setHasMetadataHashEntry(bool V) { setSubclassData<HasMetadataField>(V); } - void setParent(BasicBlock *P); protected: diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 0afc585dfbe5..00ecc2aa7f37 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -27,6 +27,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -105,7 +106,7 @@ public: /// Get allocation size in bits. Returns None if size can't be determined, /// e.g. in case of a VLA. - Optional<uint64_t> getAllocationSizeInBits(const DataLayout &DL) const; + Optional<TypeSize> getAllocationSizeInBits(const DataLayout &DL) const; /// Return the type that is being allocated by the instruction. Type *getAllocatedType() const { return AllocatedType; } @@ -1289,6 +1290,30 @@ public: return !isEquality(P); } + /// Return true if the predicate is SGT or UGT. + /// + static bool isGT(Predicate P) { + return P == ICMP_SGT || P == ICMP_UGT; + } + + /// Return true if the predicate is SLT or ULT. + /// + static bool isLT(Predicate P) { + return P == ICMP_SLT || P == ICMP_ULT; + } + + /// Return true if the predicate is SGE or UGE. + /// + static bool isGE(Predicate P) { + return P == ICMP_SGE || P == ICMP_UGE; + } + + /// Return true if the predicate is SLE or ULE. + /// + static bool isLE(Predicate P) { + return P == ICMP_SLE || P == ICMP_ULE; + } + /// Exchange the two operands to this instruction in such a way that it does /// not modify the semantics of the instruction. The predicate value may be /// changed to retain the same result if the predicate is order dependent @@ -1560,6 +1585,16 @@ public: static CallInst *Create(CallInst *CI, ArrayRef<OperandBundleDef> Bundles, Instruction *InsertPt = nullptr); + /// Create a clone of \p CI with a different set of operand bundles and + /// insert it before \p InsertPt. + /// + /// The returned call instruction is identical \p CI in every way except that + /// the operand bundle for the new instruction is set to the operand bundle + /// in \p Bundle. + static CallInst *CreateWithReplacedBundle(CallInst *CI, + OperandBundleDef Bundle, + Instruction *InsertPt = nullptr); + /// Generate the IR for a call to malloc: /// 1. Compute the malloc call's argument as the specified type's size, /// possibly multiplied by the array size if the array size is not @@ -2035,8 +2070,9 @@ public: /// Examples: shufflevector <4 x n> A, <4 x n> B, <1,2,3> /// shufflevector <4 x n> A, <4 x n> B, <1,2,3,4,5> bool changesLength() const { - unsigned NumSourceElts = - cast<VectorType>(Op<0>()->getType())->getElementCount().Min; + unsigned NumSourceElts = cast<VectorType>(Op<0>()->getType()) + ->getElementCount() + .getKnownMinValue(); unsigned NumMaskElts = ShuffleMask.size(); return NumSourceElts != NumMaskElts; } @@ -2045,8 +2081,9 @@ public: /// elements than its source vectors. /// Example: shufflevector <2 x n> A, <2 x n> B, <1,2,3> bool increasesLength() const { - unsigned NumSourceElts = - cast<VectorType>(Op<0>()->getType())->getNumElements(); + unsigned NumSourceElts = cast<VectorType>(Op<0>()->getType()) + ->getElementCount() + .getKnownMinValue(); unsigned NumMaskElts = ShuffleMask.size(); return NumSourceElts < NumMaskElts; } @@ -2232,6 +2269,10 @@ public: static bool isExtractSubvectorMask(const Constant *Mask, int NumSrcElts, int &Index) { assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); + // Not possible to express a shuffle mask for a scalable vector for this + // case. + if (isa<ScalableVectorType>(Mask->getType())) + return false; SmallVector<int, 16> MaskAsInts; getShuffleMask(Mask, MaskAsInts); return isExtractSubvectorMask(MaskAsInts, NumSrcElts, Index); @@ -2239,7 +2280,13 @@ public: /// Return true if this shuffle mask is an extract subvector mask. bool isExtractSubvectorMask(int &Index) const { - int NumSrcElts = cast<VectorType>(Op<0>()->getType())->getNumElements(); + // Not possible to express a shuffle mask for a scalable vector for this + // case. + if (isa<ScalableVectorType>(getType())) + return false; + + int NumSrcElts = + cast<FixedVectorType>(Op<0>()->getType())->getNumElements(); return isExtractSubvectorMask(ShuffleMask, NumSrcElts, Index); } @@ -2743,6 +2790,15 @@ public: /// non-undef value. bool hasConstantOrUndefValue() const; + /// If the PHI node is complete which means all of its parent's predecessors + /// have incoming value in this PHI, return true, otherwise return false. + bool isComplete() const { + return llvm::all_of(predecessors(getParent()), + [this](const BasicBlock *Pred) { + return getBasicBlockIndex(Pred) >= 0; + }); + } + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::PHI; @@ -3768,6 +3824,16 @@ public: static InvokeInst *Create(InvokeInst *II, ArrayRef<OperandBundleDef> Bundles, Instruction *InsertPt = nullptr); + /// Create a clone of \p II with a different set of operand bundles and + /// insert it before \p InsertPt. + /// + /// The returned invoke instruction is identical to \p II in every way except + /// that the operand bundle for the new instruction is set to the operand + /// bundle in \p Bundle. + static InvokeInst *CreateWithReplacedBundle(InvokeInst *II, + OperandBundleDef Bundles, + Instruction *InsertPt = nullptr); + // get*Dest - Return the destination basic blocks... BasicBlock *getNormalDest() const { return cast<BasicBlock>(Op<NormalDestOpEndIdx>()); diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 7a8898464e66..9d68f3fdde6c 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -52,6 +52,36 @@ public: return getCalledFunction()->getIntrinsicID(); } + /// Return true if swapping the first two arguments to the intrinsic produces + /// the same result. + bool isCommutative() const { + switch (getIntrinsicID()) { + case Intrinsic::maxnum: + case Intrinsic::minnum: + case Intrinsic::maximum: + case Intrinsic::minimum: + case Intrinsic::smax: + case Intrinsic::smin: + case Intrinsic::umax: + case Intrinsic::umin: + case Intrinsic::sadd_sat: + case Intrinsic::uadd_sat: + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_fix: + case Intrinsic::umul_fix: + case Intrinsic::smul_fix_sat: + case Intrinsic::umul_fix_sat: + case Intrinsic::fma: + case Intrinsic::fmuladd: + return true; + default: + return false; + } + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const CallInst *I) { if (const Function *CF = I->getCalledFunction()) @@ -937,6 +967,51 @@ public: } }; +class PseudoProbeInst : public IntrinsicInst { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::pseudoprobe; + } + + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } + + ConstantInt *getFuncGuid() const { + return cast<ConstantInt>(const_cast<Value *>(getArgOperand(0))); + } + + ConstantInt *getAttributes() const { + return cast<ConstantInt>(const_cast<Value *>(getArgOperand(2))); + } + + ConstantInt *getIndex() const { + return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1))); + } +}; + +class NoAliasScopeDeclInst : public IntrinsicInst { +public: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::experimental_noalias_scope_decl; + } + + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } + + MDNode *getScopeList() const { + auto *MV = + cast<MetadataAsValue>(getOperand(Intrinsic::NoAliasScopeDeclScopeArg)); + return cast<MDNode>(MV->getMetadata()); + } + + void setScopeList(MDNode *ScopeList) { + setOperand(Intrinsic::NoAliasScopeDeclScopeArg, + MetadataAsValue::get(getContext(), ScopeList)); + } +}; + } // end namespace llvm #endif // LLVM_IR_INTRINSICINST_H diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h index a9e6525e2f3d..f9b6c098a3f2 100644 --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -34,6 +34,9 @@ class AttributeList; /// function known by LLVM. The enum values are returned by /// Function::getIntrinsicID(). namespace Intrinsic { + // Abstraction for the arguments of the noalias intrinsics + static const int NoAliasScopeDeclScopeArg = 0; + // Intrinsic ID type. This is an opaque typedef to facilitate splitting up // the enum into target-specific enums. typedef unsigned ID; @@ -125,7 +128,8 @@ namespace Intrinsic { VecElementArgument, Subdivide2Argument, Subdivide4Argument, - VecOfBitcastsToInt + VecOfBitcastsToInt, + AMX } Kind; union { @@ -188,10 +192,8 @@ namespace Intrinsic { } static IITDescriptor getVector(unsigned Width, bool IsScalable) { - IITDescriptor Result; - Result.Kind = Vector; - Result.Vector_Width.Min = Width; - Result.Vector_Width.Scalable = IsScalable; + IITDescriptor Result = {Vector, {0}}; + Result.Vector_Width = ElementCount::get(Width, IsScalable); return Result; } }; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 4918ea876df6..b2bfc6e6f9e6 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -17,7 +17,9 @@ include "llvm/CodeGen/SDNodeProperties.td" // Properties we keep track of for intrinsics. //===----------------------------------------------------------------------===// -class IntrinsicProperty; +class IntrinsicProperty<bit is_default = false> { + bit IsDefault = is_default; +} // Intr*Mem - Memory properties. If no property is set, the worst case // is assumed (it may read and write any memory it can get access to and it may @@ -27,10 +29,6 @@ class IntrinsicProperty; // effects. It may be CSE'd deleted if dead, etc. def IntrNoMem : IntrinsicProperty; -// IntrNoSync - Threads executing the intrinsic will not synchronize using -// memory or other means. -def IntrNoSync : IntrinsicProperty; - // IntrReadMem - This intrinsic only reads from memory. It does not write to // memory and has no other side effects. Therefore, it cannot be moved across // potentially aliasing stores. However, it can be reordered otherwise and can @@ -81,6 +79,11 @@ class NoAlias<AttrIndex idx> : IntrinsicProperty { int ArgNo = idx.Value; } +// NoUndef - The specified argument is neither undef nor poison. +class NoUndef<AttrIndex idx> : IntrinsicProperty { + int ArgNo = idx.Value; +} + class Align<AttrIndex idx, int align> : IntrinsicProperty { int ArgNo = idx.Value; int Align = align; @@ -117,9 +120,15 @@ class ReadNone<AttrIndex idx> : IntrinsicProperty { def IntrNoReturn : IntrinsicProperty; -def IntrNoFree : IntrinsicProperty; +// IntrNoSync - Threads executing the intrinsic will not synchronize using +// memory or other means. Applied by default. +def IntrNoSync : IntrinsicProperty<1>; + +// Applied by default. +def IntrNoFree : IntrinsicProperty<1>; -def IntrWillReturn : IntrinsicProperty; +// Applied by default. +def IntrWillReturn : IntrinsicProperty<1>; // IntrCold - Calls to this intrinsic are cold. // Parallels the cold attribute on LLVM IR functions. @@ -152,7 +161,7 @@ def IntrHasSideEffects : IntrinsicProperty; class LLVMType<ValueType vt> { ValueType VT = vt; - int isAny = 0; + int isAny = false; } class LLVMQualPointerType<LLVMType elty, int addrspace> @@ -168,7 +177,7 @@ class LLVMAnyPointerType<LLVMType elty> : LLVMType<iPTRAny>{ LLVMType ElTy = elty; - let isAny = 1; + let isAny = true; } // Match the type of another intrinsic parameter. Number is an index into the @@ -217,7 +226,7 @@ class LLVMSubdivide4VectorType<int num> : LLVMMatchType<num>; class LLVMVectorOfBitcastsToInt<int num> : LLVMMatchType<num>; def llvm_void_ty : LLVMType<isVoid>; -let isAny = 1 in { +let isAny = true in { def llvm_any_ty : LLVMType<Any>; def llvm_anyint_ty : LLVMType<iAny>; def llvm_anyfloat_ty : LLVMType<fAny>; @@ -246,6 +255,8 @@ def llvm_token_ty : LLVMType<token>; // token def llvm_x86mmx_ty : LLVMType<x86mmx>; def llvm_ptrx86mmx_ty : LLVMPointerType<llvm_x86mmx_ty>; // <1 x i64>* +def llvm_x86amx_ty : LLVMType<x86amx>; + def llvm_v2i1_ty : LLVMType<v2i1>; // 2 x i1 def llvm_v4i1_ty : LLVMType<v4i1>; // 4 x i1 def llvm_v8i1_ty : LLVMType<v8i1>; // 8 x i1 @@ -253,6 +264,7 @@ def llvm_v16i1_ty : LLVMType<v16i1>; // 16 x i1 def llvm_v32i1_ty : LLVMType<v32i1>; // 32 x i1 def llvm_v64i1_ty : LLVMType<v64i1>; // 64 x i1 def llvm_v128i1_ty : LLVMType<v128i1>; // 128 x i1 +def llvm_v256i1_ty : LLVMType<v256i1>; // 256 x i1 def llvm_v512i1_ty : LLVMType<v512i1>; // 512 x i1 def llvm_v1024i1_ty : LLVMType<v1024i1>; //1024 x i1 @@ -282,6 +294,7 @@ def llvm_v8i32_ty : LLVMType<v8i32>; // 8 x i32 def llvm_v16i32_ty : LLVMType<v16i32>; // 16 x i32 def llvm_v32i32_ty : LLVMType<v32i32>; // 32 x i32 def llvm_v64i32_ty : LLVMType<v64i32>; // 64 x i32 +def llvm_v256i32_ty : LLVMType<v256i32>; //256 x i32 def llvm_v1i64_ty : LLVMType<v1i64>; // 1 x i64 def llvm_v2i64_ty : LLVMType<v2i64>; // 2 x i64 @@ -331,7 +344,8 @@ class Intrinsic<list<LLVMType> ret_types, list<LLVMType> param_types = [], list<IntrinsicProperty> intr_properties = [], string name = "", - list<SDNodeProperty> sd_properties = []> : SDPatternOperator { + list<SDNodeProperty> sd_properties = [], + bit disable_default_attributes = true> : SDPatternOperator { string LLVMName = name; string TargetPrefix = ""; // Set to a prefix for target-specific intrinsics. list<LLVMType> RetTypes = ret_types; @@ -339,9 +353,23 @@ class Intrinsic<list<LLVMType> ret_types, list<IntrinsicProperty> IntrProperties = intr_properties; let Properties = sd_properties; - bit isTarget = 0; + // Disable applying IntrinsicProperties that are marked default with + // IntrinsicProperty<1> + bit DisableDefaultAttributes = disable_default_attributes; + + bit isTarget = false; } +// Intrinisc with default attributes (disable_default_attributes = false). +class DefaultAttrsIntrinsic<list<LLVMType> ret_types, + list<LLVMType> param_types = [], + list<IntrinsicProperty> intr_properties = [], + string name = "", + list<SDNodeProperty> sd_properties = []> + : Intrinsic<ret_types, param_types, + intr_properties, name, + sd_properties, /*disable_default_attributes*/ 0> {} + /// GCCBuiltin - If this intrinsic exactly corresponds to a GCC builtin, this /// specifies the name of the builtin. This provides automatic CBE and CFE /// support. @@ -357,10 +385,10 @@ class MSBuiltin<string name> { //===--------------- Variable Argument Handling Intrinsics ----------------===// // -def int_vastart : Intrinsic<[], [llvm_ptr_ty], [], "llvm.va_start">; -def int_vacopy : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [], +def int_vastart : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], [], "llvm.va_start">; +def int_vacopy : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [], "llvm.va_copy">; -def int_vaend : Intrinsic<[], [llvm_ptr_ty], [], "llvm.va_end">; +def int_vaend : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], [], "llvm.va_end">; //===------------------- Garbage Collection Intrinsics --------------------===// // @@ -448,12 +476,12 @@ def int_objc_arc_annotation_bottomup_bbend : Intrinsic<[], //===--------------------- Code Generator Intrinsics ----------------------===// // -def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], +def int_returnaddress : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<0>>]>; -def int_addressofreturnaddress : Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>; -def int_frameaddress : Intrinsic<[llvm_anyptr_ty], [llvm_i32_ty], +def int_addressofreturnaddress : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>; +def int_frameaddress : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<0>>]>; -def int_sponentry : Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>; +def int_sponentry : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [], [IntrNoMem]>; def int_read_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty], [IntrReadMem], "llvm.read_register">; def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty], @@ -464,33 +492,33 @@ def int_read_volatile_register : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty] // Gets the address of the local variable area. This is typically a copy of the // stack, frame, or base pointer depending on the type of prologue. -def int_localaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; +def int_localaddress : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; // Escapes local variables to allow access from other functions. -def int_localescape : Intrinsic<[], [llvm_vararg_ty]>; +def int_localescape : DefaultAttrsIntrinsic<[], [llvm_vararg_ty]>; // Given a function and the localaddress of a parent frame, returns a pointer // to an escaped allocation indicated by the index. -def int_localrecover : Intrinsic<[llvm_ptr_ty], +def int_localrecover : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; // Given the frame pointer passed into an SEH filter function, returns a // pointer to the local variable area suitable for use with llvm.localrecover. -def int_eh_recoverfp : Intrinsic<[llvm_ptr_ty], +def int_eh_recoverfp : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty], [IntrNoMem]>; // Note: we treat stacksave/stackrestore as writemem because we don't otherwise // model their dependencies on allocas. -def int_stacksave : Intrinsic<[llvm_ptr_ty]>, +def int_stacksave : DefaultAttrsIntrinsic<[llvm_ptr_ty]>, GCCBuiltin<"__builtin_stack_save">; -def int_stackrestore : Intrinsic<[], [llvm_ptr_ty]>, +def int_stackrestore : DefaultAttrsIntrinsic<[], [llvm_ptr_ty]>, GCCBuiltin<"__builtin_stack_restore">; -def int_get_dynamic_area_offset : Intrinsic<[llvm_anyint_ty]>; +def int_get_dynamic_area_offset : DefaultAttrsIntrinsic<[llvm_anyint_ty]>; -def int_thread_pointer : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>, +def int_thread_pointer : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], [IntrNoMem]>, GCCBuiltin<"__builtin_thread_pointer">; // IntrInaccessibleMemOrArgMemOnly is a little more pessimistic than strictly @@ -498,51 +526,59 @@ def int_thread_pointer : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>, // from being reordered overly much with respect to nearby access to the same // memory while not impeding optimization. def int_prefetch - : Intrinsic<[], [ llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], + : DefaultAttrsIntrinsic<[], [ llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty ], [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>; -def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>; +def int_pcmarker : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>; -def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>; +def int_readcyclecounter : DefaultAttrsIntrinsic<[llvm_i64_ty]>; // The assume intrinsic is marked as arbitrarily writing so that proper // control dependencies will be maintained. -def int_assume : Intrinsic<[], [llvm_i1_ty], [IntrWillReturn]>; +def int_assume : DefaultAttrsIntrinsic<[], [llvm_i1_ty], [IntrWillReturn, + NoUndef<ArgIndex<0>>]>; + +// 'llvm.experimental.noalias.scope.decl' intrinsic: Inserted at the location of +// noalias scope declaration. Makes it possible to identify that a noalias scope +// is only valid inside the body of a loop. +// +// Purpose of the different arguments: +// - arg0: id.scope: metadata representing the scope declaration. +def int_experimental_noalias_scope_decl + : DefaultAttrsIntrinsic<[], [llvm_metadata_ty], + [IntrInaccessibleMemOnly]>; // blocks LICM and some more // Stack Protector Intrinsic - The stackprotector intrinsic writes the stack // guard to the correct place on the stack frame. -def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>; -def int_stackguard : Intrinsic<[llvm_ptr_ty], [], []>; +def int_stackprotector : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>; +def int_stackguard : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], []>; // A counter increment for instrumentation based profiling. def int_instrprof_increment : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, - llvm_i32_ty, llvm_i32_ty], - []>; + llvm_i32_ty, llvm_i32_ty]>; // A counter increment with step for instrumentation based profiling. def int_instrprof_increment_step : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, - llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], - []>; + llvm_i32_ty, llvm_i32_ty, llvm_i64_ty]>; // A call to profile runtime for value profiling of target expressions // through instrumentation based profiling. def int_instrprof_value_profile : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, - llvm_i32_ty], - []>; + llvm_i32_ty]>; -def int_call_preallocated_setup : Intrinsic<[llvm_token_ty], [llvm_i32_ty]>; -def int_call_preallocated_arg : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_i32_ty]>; -def int_call_preallocated_teardown : Intrinsic<[], [llvm_token_ty]>; +def int_call_preallocated_setup : DefaultAttrsIntrinsic<[llvm_token_ty], [llvm_i32_ty]>; +def int_call_preallocated_arg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_i32_ty]>; +def int_call_preallocated_teardown : DefaultAttrsIntrinsic<[], [llvm_token_ty]>; //===------------------- Standard C Library Intrinsics --------------------===// // -def int_memcpy : Intrinsic<[], +def int_memcpy : DefaultAttrsIntrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], [IntrArgMemOnly, IntrWillReturn, @@ -556,7 +592,7 @@ def int_memcpy : Intrinsic<[], // external function. // The third argument (specifying the size) must be a constant. def int_memcpy_inline - : Intrinsic<[], + : DefaultAttrsIntrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, @@ -564,13 +600,14 @@ def int_memcpy_inline WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; -def int_memmove : Intrinsic<[], +def int_memmove : DefaultAttrsIntrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>, NoCapture<ArgIndex<1>>, - ReadOnly<ArgIndex<1>>, ImmArg<ArgIndex<3>>]>; -def int_memset : Intrinsic<[], + WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>, + ImmArg<ArgIndex<3>>]>; +def int_memset : DefaultAttrsIntrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty], [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, @@ -581,65 +618,65 @@ def int_memset : Intrinsic<[], // rounding modes and FP exception handling. let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { - def int_fma : Intrinsic<[llvm_anyfloat_ty], + def int_fma : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; - def int_fmuladd : Intrinsic<[llvm_anyfloat_ty], + def int_fmuladd : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; // These functions do not read memory, but are sensitive to the // rounding mode. LLVM purposely does not model changes to the FP // environment so they can be treated as readnone. - def int_sqrt : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_powi : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>; - def int_sin : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_cos : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_pow : Intrinsic<[llvm_anyfloat_ty], + def int_sqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_powi : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>; + def int_sin : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_cos : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_pow : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; - def int_log : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_log10: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_log2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_exp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_fabs : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_copysign : Intrinsic<[llvm_anyfloat_ty], + def int_log : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_log10: DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_log2 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_exp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_exp2 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_fabs : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_copysign : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>]>; - def int_floor : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_ceil : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_trunc : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_rint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_nearbyint : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_round : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_roundeven : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; - def int_canonicalize : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], + def int_floor : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_ceil : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_trunc : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_rint : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_nearbyint : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_round : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_roundeven : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_canonicalize : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; - def int_lround : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; - def int_llround : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; - def int_lrint : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; - def int_llrint : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; + def int_lround : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; + def int_llround : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; + def int_lrint : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; + def int_llrint : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; } -def int_minnum : Intrinsic<[llvm_anyfloat_ty], +def int_minnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; -def int_maxnum : Intrinsic<[llvm_anyfloat_ty], +def int_maxnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; -def int_minimum : Intrinsic<[llvm_anyfloat_ty], +def int_minimum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; -def int_maximum : Intrinsic<[llvm_anyfloat_ty], +def int_maximum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; // Internal interface for object size checking -def int_objectsize : Intrinsic<[llvm_anyint_ty], +def int_objectsize : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i1_ty, llvm_i1_ty, llvm_i1_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, @@ -651,77 +688,77 @@ def int_objectsize : Intrinsic<[llvm_anyint_ty], // let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { - def int_flt_rounds : Intrinsic<[llvm_i32_ty], []>; + def int_flt_rounds : DefaultAttrsIntrinsic<[llvm_i32_ty], []>; } //===--------------- Constrained Floating Point Intrinsics ----------------===// // let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { - def int_experimental_constrained_fadd : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_fadd : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_fsub : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_fsub : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_fmul : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_fmul : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_fdiv : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_fdiv : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_frem : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_frem : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_fma : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_fma : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_fmuladd : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_fmuladd : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_fptosi : Intrinsic<[ llvm_anyint_ty ], + def int_experimental_constrained_fptosi : DefaultAttrsIntrinsic<[ llvm_anyint_ty ], [ llvm_anyfloat_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_fptoui : Intrinsic<[ llvm_anyint_ty ], + def int_experimental_constrained_fptoui : DefaultAttrsIntrinsic<[ llvm_anyint_ty ], [ llvm_anyfloat_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_sitofp : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_sitofp : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ llvm_anyint_ty, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_uitofp : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_uitofp : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ llvm_anyint_ty, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_fptrunc : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ llvm_anyfloat_ty, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_fpext : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_fpext : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ llvm_anyfloat_ty, llvm_metadata_ty ]>; @@ -729,110 +766,110 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { // versions of each of them. When strict rounding and exception control are // not required the non-constrained versions of these intrinsics should be // used. - def int_experimental_constrained_sqrt : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_sqrt : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_powi : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_powi : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_i32_ty, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_sin : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_sin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_cos : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_cos : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_pow : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_pow : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_log : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_log : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_log10: Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_log10: DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_log2 : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_log2 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_exp : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_exp : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_exp2 : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_exp2 : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_rint : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_rint : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_nearbyint : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_nearbyint : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_lrint : Intrinsic<[ llvm_anyint_ty ], + def int_experimental_constrained_lrint : DefaultAttrsIntrinsic<[ llvm_anyint_ty ], [ llvm_anyfloat_ty, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_llrint : Intrinsic<[ llvm_anyint_ty ], + def int_experimental_constrained_llrint : DefaultAttrsIntrinsic<[ llvm_anyint_ty ], [ llvm_anyfloat_ty, llvm_metadata_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_maxnum : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_maxnum : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, llvm_metadata_ty ]>; - def int_experimental_constrained_minnum : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_minnum : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, llvm_metadata_ty ]>; - def int_experimental_constrained_maximum : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_maximum : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, llvm_metadata_ty ]>; - def int_experimental_constrained_minimum : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_minimum : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, llvm_metadata_ty ]>; - def int_experimental_constrained_ceil : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_ceil : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty ]>; - def int_experimental_constrained_floor : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_floor : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty ]>; - def int_experimental_constrained_lround : Intrinsic<[ llvm_anyint_ty ], + def int_experimental_constrained_lround : DefaultAttrsIntrinsic<[ llvm_anyint_ty ], [ llvm_anyfloat_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_llround : Intrinsic<[ llvm_anyint_ty ], + def int_experimental_constrained_llround : DefaultAttrsIntrinsic<[ llvm_anyint_ty ], [ llvm_anyfloat_ty, llvm_metadata_ty ]>; - def int_experimental_constrained_round : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty ]>; - def int_experimental_constrained_roundeven : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_roundeven : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty ]>; - def int_experimental_constrained_trunc : Intrinsic<[ llvm_anyfloat_ty ], + def int_experimental_constrained_trunc : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], [ LLVMMatchType<0>, llvm_metadata_ty ]>; // Constrained floating-point comparison (quiet and signaling variants). // Third operand is the predicate represented as a metadata string. def int_experimental_constrained_fcmp - : Intrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], + : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], [ llvm_anyfloat_ty, LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; def int_experimental_constrained_fcmps - : Intrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], + : DefaultAttrsIntrinsic<[ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ], [ llvm_anyfloat_ty, LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; } @@ -840,10 +877,10 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { //===------------------------- Expect Intrinsics --------------------------===// // -def int_expect : Intrinsic<[llvm_anyint_ty], +def int_expect : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrWillReturn]>; -def int_expect_with_probability : Intrinsic<[llvm_anyint_ty], +def int_expect_with_probability : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_double_ty], [IntrNoMem, IntrWillReturn]>; @@ -852,19 +889,19 @@ def int_expect_with_probability : Intrinsic<[llvm_anyint_ty], // None of these intrinsics accesses memory at all. let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { - def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; - def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; - def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; - def int_fshl : Intrinsic<[llvm_anyint_ty], + def int_bswap: DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; + def int_ctpop: DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; + def int_bitreverse : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>; + def int_fshl : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; - def int_fshr : Intrinsic<[llvm_anyint_ty], + def int_fshr : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; } let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<1>>] in { - def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; - def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; + def int_ctlz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; + def int_cttz : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>; } //===------------------------ Debugger Intrinsics -------------------------===// @@ -875,19 +912,19 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn, // needed in a few places. These synthetic intrinsics have no // side-effects and just mark information about their operands. let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { - def int_dbg_declare : Intrinsic<[], + def int_dbg_declare : DefaultAttrsIntrinsic<[], [llvm_metadata_ty, llvm_metadata_ty, llvm_metadata_ty]>; - def int_dbg_value : Intrinsic<[], + def int_dbg_value : DefaultAttrsIntrinsic<[], [llvm_metadata_ty, llvm_metadata_ty, llvm_metadata_ty]>; - def int_dbg_addr : Intrinsic<[], + def int_dbg_addr : DefaultAttrsIntrinsic<[], [llvm_metadata_ty, llvm_metadata_ty, llvm_metadata_ty]>; - def int_dbg_label : Intrinsic<[], + def int_dbg_label : DefaultAttrsIntrinsic<[], [llvm_metadata_ty]>; } @@ -917,10 +954,9 @@ def int_eh_unwind_init: Intrinsic<[]>, def int_eh_dwarf_cfa : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty]>; -let IntrProperties = [IntrNoMem] in { - def int_eh_sjlj_lsda : Intrinsic<[llvm_ptr_ty]>; - def int_eh_sjlj_callsite : Intrinsic<[], [llvm_i32_ty]>; -} +def int_eh_sjlj_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; +def int_eh_sjlj_callsite : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; + def int_eh_sjlj_functioncontext : Intrinsic<[], [llvm_ptr_ty]>; def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty], [IntrNoReturn]>; @@ -928,15 +964,15 @@ def int_eh_sjlj_setup_dispatch : Intrinsic<[], []>; //===---------------- Generic Variable Attribute Intrinsics----------------===// // -def int_var_annotation : Intrinsic<[], +def int_var_annotation : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, - llvm_ptr_ty, llvm_i32_ty], + llvm_ptr_ty, llvm_i32_ty, llvm_ptr_ty], [IntrWillReturn], "llvm.var.annotation">; -def int_ptr_annotation : Intrinsic<[LLVMAnyPointerType<llvm_anyint_ty>], +def int_ptr_annotation : DefaultAttrsIntrinsic<[LLVMAnyPointerType<llvm_anyint_ty>], [LLVMMatchType<0>, llvm_ptr_ty, llvm_ptr_ty, - llvm_i32_ty], + llvm_i32_ty, llvm_ptr_ty], [IntrWillReturn], "llvm.ptr.annotation">; -def int_annotation : Intrinsic<[llvm_anyint_ty], +def int_annotation : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], [IntrWillReturn], "llvm.annotation">; @@ -944,7 +980,7 @@ def int_annotation : Intrinsic<[llvm_anyint_ty], // Annotates the current program point with metadata strings which are emitted // as CodeView debug info records. This is expensive, as it disables inlining // and is modelled as having side effects. -def int_codeview_annotation : Intrinsic<[], [llvm_metadata_ty], +def int_codeview_annotation : DefaultAttrsIntrinsic<[], [llvm_metadata_ty], [IntrInaccessibleMemOnly, IntrNoDuplicate, IntrWillReturn], "llvm.codeview.annotation">; @@ -964,99 +1000,124 @@ def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], // Expose the carry flag from add operations on two integrals. let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { - def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, + def int_sadd_with_overflow : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [LLVMMatchType<0>, LLVMMatchType<0>]>; - def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, + def int_uadd_with_overflow : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [LLVMMatchType<0>, LLVMMatchType<0>]>; - def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, + def int_ssub_with_overflow : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [LLVMMatchType<0>, LLVMMatchType<0>]>; - def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, + def int_usub_with_overflow : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [LLVMMatchType<0>, LLVMMatchType<0>]>; - def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, + def int_smul_with_overflow : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [LLVMMatchType<0>, LLVMMatchType<0>]>; - def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, + def int_umul_with_overflow : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [LLVMMatchType<0>, LLVMMatchType<0>]>; } //===------------------------- Saturation Arithmetic Intrinsics ---------------------===// // -def int_sadd_sat : Intrinsic<[llvm_anyint_ty], +def int_sadd_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]>; -def int_uadd_sat : Intrinsic<[llvm_anyint_ty], +def int_uadd_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]>; -def int_ssub_sat : Intrinsic<[llvm_anyint_ty], +def int_ssub_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_usub_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_sshl_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; -def int_usub_sat : Intrinsic<[llvm_anyint_ty], +def int_ushl_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; //===------------------------- Fixed Point Arithmetic Intrinsics ---------------------===// // -def int_smul_fix : Intrinsic<[llvm_anyint_ty], +def int_smul_fix : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<ArgIndex<2>>]>; -def int_umul_fix : Intrinsic<[llvm_anyint_ty], +def int_umul_fix : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<ArgIndex<2>>]>; -def int_sdiv_fix : Intrinsic<[llvm_anyint_ty], +def int_sdiv_fix : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; -def int_udiv_fix : Intrinsic<[llvm_anyint_ty], +def int_udiv_fix : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; //===------------------- Fixed Point Saturation Arithmetic Intrinsics ----------------===// // -def int_smul_fix_sat : Intrinsic<[llvm_anyint_ty], +def int_smul_fix_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<ArgIndex<2>>]>; -def int_umul_fix_sat : Intrinsic<[llvm_anyint_ty], +def int_umul_fix_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<ArgIndex<2>>]>; -def int_sdiv_fix_sat : Intrinsic<[llvm_anyint_ty], +def int_sdiv_fix_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; -def int_udiv_fix_sat : Intrinsic<[llvm_anyint_ty], +def int_udiv_fix_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; +//===------------------ Integer Min/Max/Abs Intrinsics --------------------===// +// +def int_abs : DefaultAttrsIntrinsic< + [llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn, ImmArg<ArgIndex<1>>]>; + +def int_smax : DefaultAttrsIntrinsic< + [llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_smin : DefaultAttrsIntrinsic< + [llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_umax : DefaultAttrsIntrinsic< + [llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_umin : DefaultAttrsIntrinsic< + [llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + //===------------------------- Memory Use Markers -------------------------===// // -def int_lifetime_start : Intrinsic<[], +def int_lifetime_start : DefaultAttrsIntrinsic<[], [llvm_i64_ty, llvm_anyptr_ty], [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<0>>]>; -def int_lifetime_end : Intrinsic<[], +def int_lifetime_end : DefaultAttrsIntrinsic<[], [llvm_i64_ty, llvm_anyptr_ty], [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<0>>]>; -def int_invariant_start : Intrinsic<[llvm_descriptor_ty], +def int_invariant_start : DefaultAttrsIntrinsic<[llvm_descriptor_ty], [llvm_i64_ty, llvm_anyptr_ty], [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<0>>]>; -def int_invariant_end : Intrinsic<[], +def int_invariant_end : DefaultAttrsIntrinsic<[], [llvm_descriptor_ty, llvm_i64_ty, llvm_anyptr_ty], [IntrArgMemOnly, IntrWillReturn, @@ -1075,26 +1136,26 @@ def int_invariant_end : Intrinsic<[], // it would remove barrier. // Note that it is still experimental, which means that its semantics // might change in the future. -def int_launder_invariant_group : Intrinsic<[llvm_anyptr_ty], +def int_launder_invariant_group : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], [IntrInaccessibleMemOnly, IntrSpeculatable, IntrWillReturn]>; -def int_strip_invariant_group : Intrinsic<[llvm_anyptr_ty], +def int_strip_invariant_group : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], [IntrSpeculatable, IntrNoMem, IntrWillReturn]>; //===------------------------ Stackmap Intrinsics -------------------------===// // -def int_experimental_stackmap : Intrinsic<[], +def int_experimental_stackmap : DefaultAttrsIntrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_vararg_ty], [Throws]>; -def int_experimental_patchpoint_void : Intrinsic<[], +def int_experimental_patchpoint_void : DefaultAttrsIntrinsic<[], [llvm_i64_ty, llvm_i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_vararg_ty], [Throws]>; -def int_experimental_patchpoint_i64 : Intrinsic<[llvm_i64_ty], +def int_experimental_patchpoint_i64 : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_vararg_ty], @@ -1139,6 +1200,23 @@ def int_coro_id_retcon_once : Intrinsic<[llvm_token_ty], llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], []>; def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>; +def int_coro_id_async : Intrinsic<[llvm_token_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_ptr_ty], + []>; +def int_coro_async_context_alloc : Intrinsic<[llvm_ptr_ty], + [llvm_ptr_ty, llvm_ptr_ty], + []>; +def int_coro_async_context_dealloc : Intrinsic<[], + [llvm_ptr_ty], + []>; +def int_coro_async_resume : Intrinsic<[llvm_ptr_ty], + [], + []>; +def int_coro_suspend_async : Intrinsic<[llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], + [llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty], + []>; +def int_coro_prepare_async : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], + [IntrNoMem]>; def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty], [WriteOnly<ArgIndex<1>>]>; @@ -1147,6 +1225,8 @@ def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty], ReadOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>]>; def int_coro_end : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_i1_ty], []>; +def int_coro_end_async + : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_i1_ty, llvm_vararg_ty], []>; def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; @@ -1190,32 +1270,47 @@ def int_trap : Intrinsic<[], [], [IntrNoReturn, IntrCold]>, GCCBuiltin<"__builtin_trap">; def int_debugtrap : Intrinsic<[]>, GCCBuiltin<"__builtin_debugtrap">; +def int_ubsantrap : Intrinsic<[], [llvm_i8_ty], + [IntrNoReturn, IntrCold, ImmArg<ArgIndex<0>>]>; // Support for dynamic deoptimization (or de-specialization) def int_experimental_deoptimize : Intrinsic<[llvm_any_ty], [llvm_vararg_ty], [Throws]>; // Support for speculative runtime guards -def int_experimental_guard : Intrinsic<[], [llvm_i1_ty, llvm_vararg_ty], +def int_experimental_guard : DefaultAttrsIntrinsic<[], [llvm_i1_ty, llvm_vararg_ty], [Throws]>; // Supports widenable conditions for guards represented as explicit branches. -def int_experimental_widenable_condition : Intrinsic<[llvm_i1_ty], [], +def int_experimental_widenable_condition : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrInaccessibleMemOnly, IntrWillReturn, IntrSpeculatable]>; // NOP: calls/invokes to this intrinsic are removed by codegen -def int_donothing : Intrinsic<[], [], [IntrNoMem, IntrWillReturn]>; +def int_donothing : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrWillReturn]>; // This instruction has no actual effect, though it is treated by the optimizer // has having opaque side effects. This may be inserted into loops to ensure // that they are not removed even if they turn out to be empty, for languages // which specify that infinite loops must be preserved. -def int_sideeffect : Intrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>; +def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>; + +// The pseudoprobe intrinsic works as a place holder to the block it probes. +// Like the sideeffect intrinsic defined above, this intrinsic is treated by the +// optimizer as having opaque side effects so that it won't be get rid of or moved +// out of the block it probes. +def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], + [IntrInaccessibleMemOnly, IntrWillReturn]>; // Intrinsics to support half precision floating point format let IntrProperties = [IntrNoMem, IntrWillReturn] in { -def int_convert_to_fp16 : Intrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>; -def int_convert_from_fp16 : Intrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>; +def int_convert_to_fp16 : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>; +def int_convert_from_fp16 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_i16_ty]>; +} + +// Saturating floating point to integer intrinsics +let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { +def int_fptoui_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; +def int_fptosi_sat : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>; } // Clear cache intrinsic, default to ignore (ie. emit nothing) @@ -1224,144 +1319,147 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [], "llvm.clear_cache">; // Intrinsic to detect whether its argument is a constant. -def int_is_constant : Intrinsic<[llvm_i1_ty], [llvm_any_ty], +def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem, IntrWillReturn, IntrConvergent], "llvm.is.constant">; // Intrinsic to mask out bits of a pointer. -def int_ptrmask: Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty], +def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; //===---------------- Vector Predication Intrinsics --------------===// -// Binary operators -let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { - def int_vp_add : Intrinsic<[ llvm_anyvector_ty ], +// Speculatable Binary operators +let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { + def int_vp_add : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; - def int_vp_sub : Intrinsic<[ llvm_anyvector_ty ], + def int_vp_sub : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; - def int_vp_mul : Intrinsic<[ llvm_anyvector_ty ], + def int_vp_mul : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; - def int_vp_sdiv : Intrinsic<[ llvm_anyvector_ty ], + def int_vp_ashr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; - def int_vp_udiv : Intrinsic<[ llvm_anyvector_ty ], + def int_vp_lshr : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; - def int_vp_srem : Intrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_urem : Intrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_ashr : Intrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_lshr : Intrinsic<[ llvm_anyvector_ty ], - [ LLVMMatchType<0>, - LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty]>; - def int_vp_shl : Intrinsic<[ llvm_anyvector_ty ], + def int_vp_shl : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; - def int_vp_or : Intrinsic<[ llvm_anyvector_ty ], + def int_vp_or : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; - def int_vp_and : Intrinsic<[ llvm_anyvector_ty ], + def int_vp_and : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; - def int_vp_xor : Intrinsic<[ llvm_anyvector_ty ], + def int_vp_xor : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], [ LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; +} +// Non-speculatable binary operators. +let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in { + def int_vp_sdiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_udiv : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_srem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_urem : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; } def int_get_active_lane_mask: - Intrinsic<[llvm_anyvector_ty], + DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyint_ty, LLVMMatchType<1>], [IntrNoMem, IntrNoSync, IntrWillReturn]>; //===-------------------------- Masked Intrinsics -------------------------===// // -def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, - LLVMAnyPointerType<LLVMMatchType<0>>, - llvm_i32_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrArgMemOnly, IntrWillReturn, ImmArg<ArgIndex<2>>]>; - -def int_masked_load : Intrinsic<[llvm_anyvector_ty], - [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], - [IntrReadMem, IntrArgMemOnly, IntrWillReturn, - ImmArg<ArgIndex<1>>]>; - -def int_masked_gather: Intrinsic<[llvm_anyvector_ty], - [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<0>], - [IntrReadMem, IntrWillReturn, - ImmArg<ArgIndex<1>>]>; - -def int_masked_scatter: Intrinsic<[], - [llvm_anyvector_ty, - LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrWillReturn, ImmArg<ArgIndex<2>>]>; - -def int_masked_expandload: Intrinsic<[llvm_anyvector_ty], - [LLVMPointerToElt<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<0>], - [IntrReadMem, IntrWillReturn]>; - -def int_masked_compressstore: Intrinsic<[], - [llvm_anyvector_ty, - LLVMPointerToElt<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], - [IntrArgMemOnly, IntrWillReturn]>; +def int_masked_load: + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], + [IntrReadMem, IntrArgMemOnly, IntrWillReturn, ImmArg<ArgIndex<1>>]>; + +def int_masked_store: + DefaultAttrsIntrinsic<[], + [llvm_anyvector_ty, LLVMAnyPointerType<LLVMMatchType<0>>, + llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, + ImmArg<ArgIndex<2>>]>; + +def int_masked_gather: + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], + [IntrReadMem, IntrWillReturn, ImmArg<ArgIndex<1>>]>; + +def int_masked_scatter: + DefaultAttrsIntrinsic<[], + [llvm_anyvector_ty, LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [IntrWriteMem, IntrWillReturn, ImmArg<ArgIndex<2>>]>; + +def int_masked_expandload: + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMPointerToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>], + [IntrReadMem, IntrWillReturn]>; + +def int_masked_compressstore: + DefaultAttrsIntrinsic<[], + [llvm_anyvector_ty, LLVMPointerToElt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [IntrWriteMem, IntrArgMemOnly, IntrWillReturn]>; // Test whether a pointer is associated with a type metadata identifier. -def int_type_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty], +def int_type_test : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty], [IntrNoMem, IntrWillReturn]>; // Safely loads a function pointer from a virtual table pointer using type metadata. -def int_type_checked_load : Intrinsic<[llvm_ptr_ty, llvm_i1_ty], +def int_type_checked_load : DefaultAttrsIntrinsic<[llvm_ptr_ty, llvm_i1_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_metadata_ty], [IntrNoMem, IntrWillReturn]>; // Create a branch funnel that implements an indirect call to a limited set of // callees. This needs to be a musttail call. -def int_icall_branch_funnel : Intrinsic<[], [llvm_vararg_ty], []>; +def int_icall_branch_funnel : DefaultAttrsIntrinsic<[], [llvm_vararg_ty], []>; -def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty], +def int_load_relative: DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty], [IntrReadMem, IntrArgMemOnly]>; def int_hwasan_check_memaccess : @@ -1413,54 +1511,55 @@ def int_memset_element_unordered_atomic //===------------------------ Reduction Intrinsics ------------------------===// // -let IntrProperties = [IntrNoMem, IntrWillReturn] in { - def int_experimental_vector_reduce_v2_fadd : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, - llvm_anyvector_ty]>; - def int_experimental_vector_reduce_v2_fmul : Intrinsic<[llvm_anyfloat_ty], - [LLVMMatchType<0>, - llvm_anyvector_ty]>; - def int_experimental_vector_reduce_add : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_mul : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_and : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_or : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_xor : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_smax : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_smin : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_umax : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_umin : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_fmax : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; - def int_experimental_vector_reduce_fmin : Intrinsic<[LLVMVectorElementType<0>], - [llvm_anyvector_ty]>; +let IntrProperties = [IntrNoMem] in { + + def int_vector_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty]>; + def int_vector_reduce_fmul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty]>; + def int_vector_reduce_add : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_mul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_and : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_or : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_xor : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_smax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_smin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_umax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_umin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_fmax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; + def int_vector_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [llvm_anyvector_ty]>; } //===----- Matrix intrinsics ---------------------------------------------===// def int_matrix_transpose - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [ IntrNoSync, IntrWillReturn, IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>; def int_matrix_multiply - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_anyvector_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoSync, IntrWillReturn, IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>; def int_matrix_column_major_load - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMPointerToElt<0>, llvm_i64_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrReadMem, @@ -1468,7 +1567,7 @@ def int_matrix_column_major_load ImmArg<ArgIndex<4>>]>; def int_matrix_column_major_store - : Intrinsic<[], + : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMPointerToElt<0>, llvm_i64_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrWriteMem, @@ -1480,18 +1579,23 @@ def int_matrix_column_major_store // Specify that the value given is the number of iterations that the next loop // will execute. def int_set_loop_iterations : - Intrinsic<[], [llvm_anyint_ty], [IntrNoDuplicate]>; + DefaultAttrsIntrinsic<[], [llvm_anyint_ty], [IntrNoDuplicate]>; + +// Same as the above, but produces a value (the same as the input operand) to +// be fed into the loop. +def int_start_loop_iterations : + DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoDuplicate]>; // Specify that the value given is the number of iterations that the next loop // will execute. Also test that the given count is not zero, allowing it to // control entry to a 'while' loop. def int_test_set_loop_iterations : - Intrinsic<[llvm_i1_ty], [llvm_anyint_ty], [IntrNoDuplicate]>; + DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_anyint_ty], [IntrNoDuplicate]>; // Decrement loop counter by the given argument. Return false if the loop // should exit. def int_loop_decrement : - Intrinsic<[llvm_i1_ty], [llvm_anyint_ty], [IntrNoDuplicate]>; + DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_anyint_ty], [IntrNoDuplicate]>; // Decrement the first operand (the loop counter) by the second operand (the // maximum number of elements processed in an iteration). Return the remaining @@ -1501,27 +1605,27 @@ def int_loop_decrement : // it's scevable, so it's the backends responsibility to handle cases where it // may be optimised. def int_loop_decrement_reg : - Intrinsic<[llvm_anyint_ty], + DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoDuplicate]>; //===----- Intrinsics that are used to provide predicate information -----===// -def int_ssa_copy : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], +def int_ssa_copy : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, Returned<ArgIndex<0>>]>; //===------- Intrinsics that are used to preserve debug information -------===// -def int_preserve_array_access_index : Intrinsic<[llvm_anyptr_ty], +def int_preserve_array_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>; -def int_preserve_union_access_index : Intrinsic<[llvm_anyptr_ty], +def int_preserve_union_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; -def int_preserve_struct_access_index : Intrinsic<[llvm_anyptr_ty], +def int_preserve_struct_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, @@ -1529,7 +1633,16 @@ def int_preserve_struct_access_index : Intrinsic<[llvm_anyptr_ty], ImmArg<ArgIndex<2>>]>; //===---------- Intrinsics to query properties of scalable vectors --------===// -def int_vscale : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; +def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; + +//===---------- Intrinsics to perform subvector insertion/extraction ------===// +def int_experimental_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; + +def int_experimental_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, llvm_i64_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; //===----------------------------------------------------------------------===// @@ -1550,3 +1663,4 @@ include "llvm/IR/IntrinsicsBPF.td" include "llvm/IR/IntrinsicsSystemZ.td" include "llvm/IR/IntrinsicsWebAssembly.td" include "llvm/IR/IntrinsicsRISCV.td" +include "llvm/IR/IntrinsicsVE.td" diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 3f71f644f9a1..da3085171b19 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -12,47 +12,58 @@ let TargetPrefix = "aarch64" in { -def int_aarch64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>; -def int_aarch64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>; -def int_aarch64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>; -def int_aarch64_stlxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>; - -def int_aarch64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>; -def int_aarch64_ldaxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty]>; +def int_aarch64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty], + [IntrNoFree, IntrWillReturn]>; +def int_aarch64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty], + [IntrNoFree, IntrWillReturn]>; +def int_aarch64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty], + [IntrNoFree, IntrWillReturn]>; +def int_aarch64_stlxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty], + [IntrNoFree, IntrWillReturn]>; + +def int_aarch64_ldxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty], + [IntrNoFree, IntrWillReturn]>; +def int_aarch64_ldaxp : Intrinsic<[llvm_i64_ty, llvm_i64_ty], [llvm_ptr_ty], + [IntrNoFree, IntrWillReturn]>; def int_aarch64_stxp : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>; + [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty], + [IntrNoFree, IntrWillReturn]>; def int_aarch64_stlxp : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty]>; + [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty], + [IntrNoFree, IntrWillReturn]>; def int_aarch64_clrex : Intrinsic<[]>; -def int_aarch64_sdiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, +def int_aarch64_sdiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; -def int_aarch64_udiv : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, +def int_aarch64_udiv : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; -def int_aarch64_fjcvtzs : Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; +def int_aarch64_fjcvtzs : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; -def int_aarch64_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; -def int_aarch64_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>; +def int_aarch64_cls: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; +def int_aarch64_cls64: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>; //===----------------------------------------------------------------------===// // HINT -def int_aarch64_hint : Intrinsic<[], [llvm_i32_ty]>; +def int_aarch64_hint : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>; //===----------------------------------------------------------------------===// // Data Barrier Instructions -def int_aarch64_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">, Intrinsic<[], [llvm_i32_ty]>; -def int_aarch64_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">, Intrinsic<[], [llvm_i32_ty]>; -def int_aarch64_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, Intrinsic<[], [llvm_i32_ty]>; +def int_aarch64_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">, + Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>; +def int_aarch64_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">, + Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>; +def int_aarch64_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, + Intrinsic<[], [llvm_i32_ty], [IntrNoFree, IntrWillReturn]>; // A space-consuming intrinsic primarily for testing block and jump table // placements. The first argument is the number of bytes this "instruction" // takes up, the second and return value are essentially chains, used to force // ordering during ISel. -def int_aarch64_space : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []>; +def int_aarch64_space : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []>; } @@ -61,129 +72,133 @@ def int_aarch64_space : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []> let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_2Scalar_Float_Intrinsic - : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], + : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_FPToIntRounding_Intrinsic - : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>; class AdvSIMD_1IntArg_Intrinsic - : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_1FloatArg_Intrinsic - : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_1VectorArg_Intrinsic - : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_1VectorArg_Expand_Intrinsic - : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_1VectorArg_Long_Intrinsic - : Intrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], [IntrNoMem]>; class AdvSIMD_1IntArg_Narrow_Intrinsic - : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyint_ty], [IntrNoMem]>; class AdvSIMD_1VectorArg_Narrow_Intrinsic - : Intrinsic<[llvm_anyint_ty], [LLVMExtendedType<0>], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMExtendedType<0>], [IntrNoMem]>; class AdvSIMD_1VectorArg_Int_Across_Intrinsic - : Intrinsic<[llvm_anyint_ty], [llvm_anyvector_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_1VectorArg_Float_Across_Intrinsic - : Intrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_2IntArg_Intrinsic - : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], + : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_2FloatArg_Intrinsic - : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], + : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_2VectorArg_Intrinsic - : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_2VectorArg_Compare_Intrinsic - : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>], [IntrNoMem]>; class AdvSIMD_2Arg_FloatCompare_Intrinsic - : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>], + : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>], [IntrNoMem]>; class AdvSIMD_2VectorArg_Long_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>, LLVMTruncatedType<0>], [IntrNoMem]>; class AdvSIMD_2VectorArg_Wide_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMTruncatedType<0>], [IntrNoMem]>; class AdvSIMD_2VectorArg_Narrow_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMExtendedType<0>, LLVMExtendedType<0>], [IntrNoMem]>; class AdvSIMD_2Arg_Scalar_Narrow_Intrinsic - : Intrinsic<[llvm_anyint_ty], + : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMExtendedType<0>, llvm_i32_ty], [IntrNoMem]>; class AdvSIMD_2VectorArg_Scalar_Expand_BySize_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_2VectorArg_Scalar_Wide_BySize_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>], [IntrNoMem]>; class AdvSIMD_2VectorArg_Scalar_Wide_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMTruncatedType<0>, llvm_i32_ty], [IntrNoMem]>; class AdvSIMD_2VectorArg_Tied_Narrow_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_2VectorArg_Lane_Intrinsic - : Intrinsic<[llvm_anyint_ty], + : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; class AdvSIMD_3VectorArg_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_3VectorArg_Scalar_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>; class AdvSIMD_3VectorArg_Tied_Narrow_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty, LLVMMatchType<1>], [IntrNoMem]>; class AdvSIMD_3VectorArg_Scalar_Tied_Narrow_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMHalfElementsVectorType<0>, llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem]>; class AdvSIMD_CvtFxToFP_Intrinsic - : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], + : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; class AdvSIMD_CvtFPToFx_Intrinsic - : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], + : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>; class AdvSIMD_1Arg_Intrinsic - : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_Dot_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>], [IntrNoMem]>; class AdvSIMD_FP16FML_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>], [IntrNoMem]>; class AdvSIMD_MatMul_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>], [IntrNoMem]>; class AdvSIMD_FML_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>], [IntrNoMem]>; + class AdvSIMD_BF16FML_Intrinsic + : DefaultAttrsIntrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; } // Arithmetic ops @@ -241,7 +256,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { // 64-bit polynomial multiply really returns an i128, which is not legal. Fake // it with a v16i8. def int_aarch64_neon_pmull64 : - Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; // Vector Extending Multiply def int_aarch64_neon_fmulx : AdvSIMD_2FloatArg_Intrinsic { @@ -251,7 +266,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { // Vector Saturating Doubling Long Multiply def int_aarch64_neon_sqdmull : AdvSIMD_2VectorArg_Long_Intrinsic; def int_aarch64_neon_sqdmulls_scalar - : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Vector Halving Subtract def int_aarch64_neon_shsub : AdvSIMD_2VectorArg_Intrinsic; @@ -421,9 +436,9 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { // Vector Conversions Between Half-Precision and Single-Precision. def int_aarch64_neon_vcvtfp2hf - : Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_aarch64_neon_vcvthf2fp - : Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>; // Vector Conversions Between Floating-point and Fixed-point. def int_aarch64_neon_vcvtfp2fxs : AdvSIMD_CvtFPToFx_Intrinsic; @@ -453,7 +468,7 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { def int_aarch64_neon_fcvtxn : AdvSIMD_1VectorArg_Expand_Intrinsic; // Scalar FP Inexact Narrowing - def int_aarch64_sisd_fcvtxn : Intrinsic<[llvm_float_ty], [llvm_double_ty], + def int_aarch64_sisd_fcvtxn : DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; // v8.2-A Dot Product @@ -466,18 +481,21 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { def int_aarch64_neon_usmmla : AdvSIMD_MatMul_Intrinsic; def int_aarch64_neon_usdot : AdvSIMD_Dot_Intrinsic; def int_aarch64_neon_bfdot : AdvSIMD_Dot_Intrinsic; - def int_aarch64_neon_bfmmla : AdvSIMD_MatMul_Intrinsic; - def int_aarch64_neon_bfmlalb : AdvSIMD_FML_Intrinsic; - def int_aarch64_neon_bfmlalt : AdvSIMD_FML_Intrinsic; + def int_aarch64_neon_bfmmla + : DefaultAttrsIntrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; + def int_aarch64_neon_bfmlalb : AdvSIMD_BF16FML_Intrinsic; + def int_aarch64_neon_bfmlalt : AdvSIMD_BF16FML_Intrinsic; // v8.6-A Bfloat Intrinsics def int_aarch64_neon_bfcvt - : Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem]>; def int_aarch64_neon_bfcvtn - : Intrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_aarch64_neon_bfcvtn2 - : Intrinsic<[llvm_v8bf16_ty], + : DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v4f32_ty], [IntrNoMem]>; @@ -490,11 +508,16 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { // v8.3-A Floating-point complex add def int_aarch64_neon_vcadd_rot90 : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_neon_vcadd_rot270 : AdvSIMD_2VectorArg_Intrinsic; + + def int_aarch64_neon_vcmla_rot0 : AdvSIMD_3VectorArg_Intrinsic; + def int_aarch64_neon_vcmla_rot90 : AdvSIMD_3VectorArg_Intrinsic; + def int_aarch64_neon_vcmla_rot180 : AdvSIMD_3VectorArg_Intrinsic; + def int_aarch64_neon_vcmla_rot270 : AdvSIMD_3VectorArg_Intrinsic; } let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_2Vector2Index_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i64_ty, LLVMMatchType<0>, llvm_i64_ty], [IntrNoMem]>; } @@ -504,68 +527,68 @@ def int_aarch64_neon_vcopy_lane: AdvSIMD_2Vector2Index_Intrinsic; let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_1Vec_Load_Intrinsic - : Intrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType<LLVMMatchType<0>>], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_1Vec_Store_Lane_Intrinsic - : Intrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty], + : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty], [IntrArgMemOnly, NoCapture<ArgIndex<2>>]>; class AdvSIMD_2Vec_Load_Intrinsic - : Intrinsic<[LLVMMatchType<0>, llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[LLVMMatchType<0>, llvm_anyvector_ty], [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_2Vec_Load_Lane_Intrinsic - : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>], + : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_2Vec_Store_Intrinsic - : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, + : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMAnyPointerType<LLVMMatchType<0>>], [IntrArgMemOnly, NoCapture<ArgIndex<2>>]>; class AdvSIMD_2Vec_Store_Lane_Intrinsic - : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, + : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, llvm_i64_ty, llvm_anyptr_ty], [IntrArgMemOnly, NoCapture<ArgIndex<3>>]>; class AdvSIMD_3Vec_Load_Intrinsic - : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_3Vec_Load_Lane_Intrinsic - : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_3Vec_Store_Intrinsic - : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, + : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMAnyPointerType<LLVMMatchType<0>>], [IntrArgMemOnly, NoCapture<ArgIndex<3>>]>; class AdvSIMD_3Vec_Store_Lane_Intrinsic - : Intrinsic<[], [llvm_anyvector_ty, + : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i64_ty, llvm_anyptr_ty], [IntrArgMemOnly, NoCapture<ArgIndex<4>>]>; class AdvSIMD_4Vec_Load_Intrinsic - : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, + : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty], [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_4Vec_Load_Lane_Intrinsic - : Intrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, + : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_4Vec_Store_Intrinsic - : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, + : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMAnyPointerType<LLVMMatchType<0>>], [IntrArgMemOnly, NoCapture<ArgIndex<4>>]>; class AdvSIMD_4Vec_Store_Lane_Intrinsic - : Intrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, + : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i64_ty, llvm_anyptr_ty], [IntrArgMemOnly, NoCapture<ArgIndex<5>>]>; @@ -603,38 +626,38 @@ def int_aarch64_neon_st4lane : AdvSIMD_4Vec_Store_Lane_Intrinsic; let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_Tbl1_Intrinsic - : Intrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, LLVMMatchType<0>], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_Tbl2_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_Tbl3_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_Tbl4_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_Tbx1_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_Tbx2_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_Tbx3_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_Tbx4_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, LLVMMatchType<0>], [IntrNoMem]>; @@ -651,7 +674,7 @@ def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic; let TargetPrefix = "aarch64" in { class FPCR_Get_Intrinsic - : Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>; + : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>; } // FPCR @@ -659,34 +682,34 @@ def int_aarch64_get_fpcr : FPCR_Get_Intrinsic; let TargetPrefix = "aarch64" in { class Crypto_AES_DataKey_Intrinsic - : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; class Crypto_AES_Data_Intrinsic - : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; // SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule // (v4i32). class Crypto_SHA_5Hash4Schedule_Intrinsic - : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty], + : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>; // SHA intrinsic taking 5 words of the hash (v4i32, i32) and 4 of the schedule // (v4i32). class Crypto_SHA_1Hash_Intrinsic - : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; // SHA intrinsic taking 8 words of the schedule class Crypto_SHA_8Schedule_Intrinsic - : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; // SHA intrinsic taking 12 words of the schedule class Crypto_SHA_12Schedule_Intrinsic - : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; // SHA intrinsic taking 8 words of the hash and 4 of the schedule. class Crypto_SHA_8Hash4Schedule_Intrinsic - : Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; } @@ -716,84 +739,96 @@ def int_aarch64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic; let TargetPrefix = "aarch64" in { -def int_aarch64_crc32b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], +def int_aarch64_crc32b : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_aarch64_crc32cb : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], +def int_aarch64_crc32cb : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_aarch64_crc32h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], +def int_aarch64_crc32h : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_aarch64_crc32ch : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], +def int_aarch64_crc32ch : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_aarch64_crc32w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], +def int_aarch64_crc32w : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_aarch64_crc32cw : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], +def int_aarch64_crc32cw : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_aarch64_crc32x : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], +def int_aarch64_crc32x : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; -def int_aarch64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], +def int_aarch64_crc32cx : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// // Memory Tagging Extensions (MTE) Intrinsics let TargetPrefix = "aarch64" in { -def int_aarch64_irg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty], +def int_aarch64_irg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrNoMem, IntrHasSideEffects]>; -def int_aarch64_addg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty], +def int_aarch64_addg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrNoMem]>; -def int_aarch64_gmi : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], +def int_aarch64_gmi : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrNoMem]>; -def int_aarch64_ldg : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty], +def int_aarch64_ldg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_ptr_ty], [IntrReadMem]>; -def int_aarch64_stg : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], +def int_aarch64_stg : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [IntrWriteMem]>; -def int_aarch64_subp : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty], +def int_aarch64_subp : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty], [IntrNoMem]>; // The following are codegen-only intrinsics for stack instrumentation. // Generate a randomly tagged stack base pointer. -def int_aarch64_irg_sp : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty], +def int_aarch64_irg_sp : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_i64_ty], [IntrNoMem, IntrHasSideEffects]>; // Transfer pointer tag with offset. // ptr1 = tagp(ptr0, baseptr, tag_offset) returns a pointer where // * address is the address in ptr0 // * tag is a function of (tag in baseptr, tag_offset). +// ** Beware, this is not the same function as implemented by the ADDG instruction! +// Backend optimizations may change tag_offset; the only guarantee is that calls +// to tagp with the same pair of (baseptr, tag_offset) will produce pointers +// with the same tag value, assuming the set of excluded tags has not changed. // Address bits in baseptr and tag bits in ptr0 are ignored. // When offset between ptr0 and baseptr is a compile time constant, this can be emitted as // ADDG ptr1, baseptr, (ptr0 - baseptr), tag_offset // It is intended that ptr0 is an alloca address, and baseptr is the direct output of llvm.aarch64.irg.sp. -def int_aarch64_tagp : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_i64_ty], +def int_aarch64_tagp : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_i64_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; // Update allocation tags for the memory range to match the tag in the pointer argument. -def int_aarch64_settag : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], +def int_aarch64_settag : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty], [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>; // Update allocation tags for the memory range to match the tag in the pointer argument, // and set memory contents to zero. -def int_aarch64_settag_zero : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], +def int_aarch64_settag_zero : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty], [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>; // Update allocation tags for 16-aligned, 16-sized memory region, and store a pair 8-byte values. -def int_aarch64_stgp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], +def int_aarch64_stgp : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], [IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>]>; } // Transactional Memory Extension (TME) Intrinsics let TargetPrefix = "aarch64" in { def int_aarch64_tstart : GCCBuiltin<"__builtin_arm_tstart">, - Intrinsic<[llvm_i64_ty]>; + Intrinsic<[llvm_i64_ty], [], [IntrWillReturn]>; -def int_aarch64_tcommit : GCCBuiltin<"__builtin_arm_tcommit">, Intrinsic<[]>; +def int_aarch64_tcommit : GCCBuiltin<"__builtin_arm_tcommit">, Intrinsic<[], [], [IntrWillReturn]>; def int_aarch64_tcancel : GCCBuiltin<"__builtin_arm_tcancel">, - Intrinsic<[], [llvm_i64_ty], [ImmArg<ArgIndex<0>>]>; + Intrinsic<[], [llvm_i64_ty], [IntrWillReturn, ImmArg<ArgIndex<0>>]>; def int_aarch64_ttest : GCCBuiltin<"__builtin_arm_ttest">, Intrinsic<[llvm_i64_ty], [], - [IntrNoMem, IntrHasSideEffects]>; + [IntrNoMem, IntrHasSideEffects, IntrWillReturn]>; + +// Armv8.7-A load/store 64-byte intrinsics +defvar data512 = !listsplat(llvm_i64_ty, 8); +def int_aarch64_ld64b: Intrinsic<data512, [llvm_ptr_ty]>; +def int_aarch64_st64b: Intrinsic<[], !listconcat([llvm_ptr_ty], data512)>; +def int_aarch64_st64bv: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], data512)>; +def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], data512)>; + } def llvm_nxv2i1_ty : LLVMType<nxv2i1>; @@ -811,88 +846,88 @@ def llvm_nxv2f64_ty : LLVMType<nxv2f64>; let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_SVE_Create_2Vector_Tuple - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>], [IntrReadMem]>; class AdvSIMD_SVE_Create_3Vector_Tuple - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>], [IntrReadMem]>; class AdvSIMD_SVE_Create_4Vector_Tuple - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>], [IntrReadMem]>; class AdvSIMD_SVE_Set_Vector_Tuple - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty], [IntrReadMem, ImmArg<ArgIndex<1>>]>; class AdvSIMD_SVE_Get_Vector_Tuple - : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>; class AdvSIMD_ManyVec_PredLoad_Intrinsic - : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMPointerToElt<0>], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMPointerToElt<0>], [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_1Vec_PredLoad_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>], [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_1Vec_PredStore_Intrinsic - : Intrinsic<[], + : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>], [IntrArgMemOnly, NoCapture<ArgIndex<2>>]>; class AdvSIMD_2Vec_PredStore_Intrinsic - : Intrinsic<[], + : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>], [IntrArgMemOnly, NoCapture<ArgIndex<3>>]>; class AdvSIMD_3Vec_PredStore_Intrinsic - : Intrinsic<[], + : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>], [IntrArgMemOnly, NoCapture<ArgIndex<4>>]>; class AdvSIMD_4Vec_PredStore_Intrinsic - : Intrinsic<[], + : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>], [IntrArgMemOnly, NoCapture<ArgIndex<5>>]>; class AdvSIMD_SVE_Index_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMVectorElementType<0>, LLVMVectorElementType<0>], [IntrNoMem]>; class AdvSIMD_Merged1VectorArg_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_2VectorArgIndexed_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; class AdvSIMD_3VectorArgIndexed_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, @@ -900,20 +935,20 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [IntrNoMem, ImmArg<ArgIndex<3>>]>; class AdvSIMD_Pred1VectorArg_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_Pred2VectorArg_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_Pred3VectorArg_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>, @@ -921,77 +956,77 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [IntrNoMem]>; class AdvSIMD_SVE_Compare_Intrinsic - : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_SVE_CompareWide_Intrinsic - : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty, llvm_nxv2i64_ty], [IntrNoMem]>; class AdvSIMD_SVE_Saturating_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [IntrNoMem]>; class AdvSIMD_SVE_SaturatingWithPattern_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>; class AdvSIMD_SVE_Saturating_N_Intrinsic<LLVMType T> - : Intrinsic<[T], + : DefaultAttrsIntrinsic<[T], [T, llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_SVE_SaturatingWithPattern_N_Intrinsic<LLVMType T> - : Intrinsic<[T], + : DefaultAttrsIntrinsic<[T], [T, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>; class AdvSIMD_SVE_CNT_Intrinsic - : Intrinsic<[LLVMVectorOfBitcastsToInt<0>], + : DefaultAttrsIntrinsic<[LLVMVectorOfBitcastsToInt<0>], [LLVMVectorOfBitcastsToInt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_SVE_ReduceWithInit_Intrinsic - : Intrinsic<[LLVMVectorElementType<0>], + : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMVectorElementType<0>, llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_SVE_ShiftByImm_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; class AdvSIMD_SVE_ShiftWide_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, llvm_nxv2i64_ty], [IntrNoMem]>; class AdvSIMD_SVE_Unpack_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMSubdivide2VectorType<0>], [IntrNoMem]>; class AdvSIMD_SVE_CADD_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>, @@ -999,7 +1034,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [IntrNoMem, ImmArg<ArgIndex<3>>]>; class AdvSIMD_SVE_CMLA_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>, @@ -1008,7 +1043,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [IntrNoMem, ImmArg<ArgIndex<4>>]>; class AdvSIMD_SVE_CMLA_LANE_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, @@ -1017,96 +1052,96 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [IntrNoMem, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>; class AdvSIMD_SVE_DUP_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMVectorElementType<0>], [IntrNoMem]>; class AdvSIMD_SVE_DUP_Unpred_Intrinsic - : Intrinsic<[llvm_anyvector_ty], [LLVMVectorElementType<0>], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMVectorElementType<0>], [IntrNoMem]>; class AdvSIMD_SVE_DUPQ_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i64_ty], [IntrNoMem]>; class AdvSIMD_SVE_EXPA_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; class AdvSIMD_SVE_FCVT_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_SVE_FCVTZS_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMVectorOfBitcastsToInt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_SVE_INSR_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMVectorElementType<0>], [IntrNoMem]>; class AdvSIMD_SVE_PTRUE_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<0>>]>; class AdvSIMD_SVE_PUNPKHI_Intrinsic - : Intrinsic<[LLVMHalfElementsVectorType<0>], + : DefaultAttrsIntrinsic<[LLVMHalfElementsVectorType<0>], [llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_SVE_SCALE_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; class AdvSIMD_SVE_SCVTF_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_SVE_TSMUL_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; class AdvSIMD_SVE_CNTB_Intrinsic - : Intrinsic<[llvm_i64_ty], + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<0>>]>; class AdvSIMD_SVE_CNTP_Intrinsic - : Intrinsic<[llvm_i64_ty], + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_SVE_DOT_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMSubdivide4VectorType<0>, LLVMSubdivide4VectorType<0>], [IntrNoMem]>; class AdvSIMD_SVE_DOT_Indexed_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMSubdivide4VectorType<0>, LLVMSubdivide4VectorType<0>, @@ -1114,65 +1149,65 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [IntrNoMem, ImmArg<ArgIndex<3>>]>; class AdvSIMD_SVE_PTEST_Intrinsic - : Intrinsic<[llvm_i1_ty], + : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; class AdvSIMD_SVE_TBL_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; class AdvSIMD_SVE2_TBX_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>], [IntrNoMem]>; class SVE2_1VectorArg_Long_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMSubdivide2VectorType<0>, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; class SVE2_2VectorArg_Long_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], [IntrNoMem]>; class SVE2_2VectorArgIndexed_Long_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; class SVE2_2VectorArg_Wide_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMSubdivide2VectorType<0>], [IntrNoMem]>; class SVE2_2VectorArg_Pred_Long_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMSubdivide2VectorType<0>], [IntrNoMem]>; class SVE2_3VectorArg_Long_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], [IntrNoMem]>; class SVE2_3VectorArgIndexed_Long_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>, @@ -1180,45 +1215,45 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [IntrNoMem, ImmArg<ArgIndex<3>>]>; class SVE2_1VectorArg_Narrowing_Intrinsic - : Intrinsic<[LLVMSubdivide2VectorType<0>], + : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>], [llvm_anyvector_ty], [IntrNoMem]>; class SVE2_Merged1VectorArg_Narrowing_Intrinsic - : Intrinsic<[LLVMSubdivide2VectorType<0>], + : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>], [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty], [IntrNoMem]>; class SVE2_2VectorArg_Narrowing_Intrinsic - : Intrinsic< + : DefaultAttrsIntrinsic< [LLVMSubdivide2VectorType<0>], [llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; class SVE2_Merged2VectorArg_Narrowing_Intrinsic - : Intrinsic< + : DefaultAttrsIntrinsic< [LLVMSubdivide2VectorType<0>], [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; class SVE2_1VectorArg_Imm_Narrowing_Intrinsic - : Intrinsic<[LLVMSubdivide2VectorType<0>], + : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>], [llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; class SVE2_2VectorArg_Imm_Narrowing_Intrinsic - : Intrinsic<[LLVMSubdivide2VectorType<0>], + : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>], [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; class SVE2_CONFLICT_DETECT_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType<llvm_any_ty>, LLVMMatchType<1>]>; class SVE2_3VectorArg_Indexed_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>, @@ -1226,7 +1261,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". [IntrNoMem, ImmArg<ArgIndex<3>>]>; class AdvSIMD_SVE_CDOT_LANE_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMSubdivide4VectorType<0>, LLVMSubdivide4VectorType<0>, @@ -1243,7 +1278,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". // This class of intrinsics are not intended to be useful within LLVM IR but // are instead here to support some of the more regid parts of the ACLE. class Builtin_SVCVT<string name, LLVMType OUT, LLVMType PRED, LLVMType IN> - : Intrinsic<[OUT], [OUT, PRED, IN], [IntrNoMem]>; + : DefaultAttrsIntrinsic<[OUT], [OUT, PRED, IN], [IntrNoMem]>; } //===----------------------------------------------------------------------===// @@ -1252,24 +1287,24 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". class AdvSIMD_SVE_Reduce_Intrinsic - : Intrinsic<[LLVMVectorElementType<0>], + : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_SVE_SADDV_Reduce_Intrinsic - : Intrinsic<[llvm_i64_ty], + : DefaultAttrsIntrinsic<[llvm_i64_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty], [IntrNoMem]>; class AdvSIMD_SVE_WHILE_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyint_ty, LLVMMatchType<1>], [IntrNoMem]>; class AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>, @@ -1278,7 +1313,7 @@ class AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>, @@ -1287,16 +1322,16 @@ class AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic [IntrReadMem, IntrArgMemOnly]>; class AdvSIMD_GatherLoad_VS_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty, llvm_i64_ty ], - [IntrReadMem, IntrArgMemOnly]>; + [IntrReadMem]>; class AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic - : Intrinsic<[], + : DefaultAttrsIntrinsic<[], [ llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1306,7 +1341,7 @@ class AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic [IntrWriteMem, IntrArgMemOnly]>; class AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic - : Intrinsic<[], + : DefaultAttrsIntrinsic<[], [ llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1316,17 +1351,17 @@ class AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic [IntrWriteMem, IntrArgMemOnly]>; class AdvSIMD_ScatterStore_VS_Intrinsic - : Intrinsic<[], + : DefaultAttrsIntrinsic<[], [ llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyvector_ty, llvm_i64_ty ], - [IntrWriteMem, IntrArgMemOnly]>; + [IntrWriteMem]>; class SVE_gather_prf_SV - : Intrinsic<[], + : DefaultAttrsIntrinsic<[], [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, // Predicate llvm_ptr_ty, // Base address @@ -1336,7 +1371,7 @@ class SVE_gather_prf_SV [IntrInaccessibleMemOrArgMemOnly, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<3>>]>; class SVE_gather_prf_VS - : Intrinsic<[], + : DefaultAttrsIntrinsic<[], [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, // Predicate llvm_anyvector_ty, // Base addresses @@ -1346,17 +1381,17 @@ class SVE_gather_prf_VS [IntrInaccessibleMemOrArgMemOnly, ImmArg<ArgIndex<3>>]>; class SVE_MatMul_Intrinsic - : Intrinsic<[llvm_anyvector_ty], + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMSubdivide4VectorType<0>, LLVMSubdivide4VectorType<0>], [IntrNoMem]>; class SVE_4Vec_BF16 - : Intrinsic<[llvm_nxv4f32_ty], + : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty], [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty], [IntrNoMem]>; class SVE_4Vec_BF16_Indexed - : Intrinsic<[llvm_nxv4f32_ty], + : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty], [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i64_ty], [IntrNoMem, ImmArg<ArgIndex<3>>]>; @@ -1408,7 +1443,7 @@ def int_aarch64_sve_stnt1 : AdvSIMD_1Vec_PredStore_Intrinsic; // def int_aarch64_sve_prf - : Intrinsic<[], [llvm_anyvector_ty, llvm_ptr_ty, llvm_i32_ty], + : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly, ImmArg<ArgIndex<2>>]>; // Scalar + 32-bit scaled offset vector, zero extend, packed and @@ -1572,10 +1607,10 @@ def int_aarch64_sve_cntp : AdvSIMD_SVE_CNTP_Intrinsic; // FFR manipulation // -def int_aarch64_sve_rdffr : GCCBuiltin<"__builtin_sve_svrdffr">, Intrinsic<[llvm_nxv16i1_ty], []>; -def int_aarch64_sve_rdffr_z : GCCBuiltin<"__builtin_sve_svrdffr_z">, Intrinsic<[llvm_nxv16i1_ty], [llvm_nxv16i1_ty]>; -def int_aarch64_sve_setffr : GCCBuiltin<"__builtin_sve_svsetffr">, Intrinsic<[], []>; -def int_aarch64_sve_wrffr : GCCBuiltin<"__builtin_sve_svwrffr">, Intrinsic<[], [llvm_nxv16i1_ty]>; +def int_aarch64_sve_rdffr : GCCBuiltin<"__builtin_sve_svrdffr">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], []>; +def int_aarch64_sve_rdffr_z : GCCBuiltin<"__builtin_sve_svrdffr_z">, DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], [llvm_nxv16i1_ty]>; +def int_aarch64_sve_setffr : GCCBuiltin<"__builtin_sve_svsetffr">, DefaultAttrsIntrinsic<[], []>; +def int_aarch64_sve_wrffr : GCCBuiltin<"__builtin_sve_svwrffr">, DefaultAttrsIntrinsic<[], [llvm_nxv16i1_ty]>; // // Saturating scalar arithmetic @@ -1888,11 +1923,11 @@ def int_aarch64_sve_ptest_last : AdvSIMD_SVE_PTEST_Intrinsic; // Reinterpreting data // -def int_aarch64_sve_convert_from_svbool : Intrinsic<[llvm_anyvector_ty], +def int_aarch64_sve_convert_from_svbool : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_nxv16i1_ty], [IntrNoMem]>; -def int_aarch64_sve_convert_to_svbool : Intrinsic<[llvm_nxv16i1_ty], +def int_aarch64_sve_convert_to_svbool : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty], [llvm_anyvector_ty], [IntrNoMem]>; @@ -2307,31 +2342,31 @@ def int_aarch64_sve_xar : AdvSIMD_2VectorArgIndexed_Intrinsic; // def int_aarch64_sve_aesd : GCCBuiltin<"__builtin_sve_svaesd_u8">, - Intrinsic<[llvm_nxv16i8_ty], + DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty, llvm_nxv16i8_ty], [IntrNoMem]>; def int_aarch64_sve_aesimc : GCCBuiltin<"__builtin_sve_svaesimc_u8">, - Intrinsic<[llvm_nxv16i8_ty], + DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty], [IntrNoMem]>; def int_aarch64_sve_aese : GCCBuiltin<"__builtin_sve_svaese_u8">, - Intrinsic<[llvm_nxv16i8_ty], + DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty, llvm_nxv16i8_ty], [IntrNoMem]>; def int_aarch64_sve_aesmc : GCCBuiltin<"__builtin_sve_svaesmc_u8">, - Intrinsic<[llvm_nxv16i8_ty], + DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty], [IntrNoMem]>; def int_aarch64_sve_rax1 : GCCBuiltin<"__builtin_sve_svrax1_u64">, - Intrinsic<[llvm_nxv2i64_ty], + DefaultAttrsIntrinsic<[llvm_nxv2i64_ty], [llvm_nxv2i64_ty, llvm_nxv2i64_ty], [IntrNoMem]>; def int_aarch64_sve_sm4e : GCCBuiltin<"__builtin_sve_svsm4e_u32">, - Intrinsic<[llvm_nxv4i32_ty], + DefaultAttrsIntrinsic<[llvm_nxv4i32_ty], [llvm_nxv4i32_ty, llvm_nxv4i32_ty], [IntrNoMem]>; def int_aarch64_sve_sm4ekey : GCCBuiltin<"__builtin_sve_svsm4ekey_u32">, - Intrinsic<[llvm_nxv4i32_ty], + DefaultAttrsIntrinsic<[llvm_nxv4i32_ty], [llvm_nxv4i32_ty, llvm_nxv4i32_ty], [IntrNoMem]>; // diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 01380afae006..ac2291f9d43b 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -18,7 +18,7 @@ class AMDGPUReadPreloadRegisterIntrinsicNamed<string name> // Used to tag image and resource intrinsics with information used to generate // mem operands. -class AMDGPURsrcIntrinsic<int rsrcarg, bit isimage = 0> { +class AMDGPURsrcIntrinsic<int rsrcarg, bit isimage = false> { int RsrcArg = rsrcarg; bit IsImage = isimage; } @@ -182,6 +182,8 @@ def int_amdgcn_init_exec : Intrinsic<[], // Set EXEC according to a thread count packed in an SGPR input: // thread_count = (input >> bitoffset) & 0x7f; // This is always moved to the beginning of the basic block. +// Note: only inreg arguments to the parent function are valid as +// inputs to this intrinsic, computed values cannot be used. def int_amdgcn_init_exec_from_input : Intrinsic<[], [llvm_i32_ty, // 32-bit SGPR input llvm_i32_ty], // bit offset of the thread count @@ -255,7 +257,17 @@ def int_amdgcn_log_clamp : Intrinsic< def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">, Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, IntrSpeculatable, IntrWillReturn] + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] +>; + +// Fused single-precision multiply-add with legacy behaviour for the multiply, +// which is that +/- 0.0 * anything (even NaN or infinity) is +0.0. This is +// intended for use on subtargets that have the v_fma_legacy_f32 and/or +// v_fmac_legacy_f32 instructions. (Note that v_fma_legacy_f16 is unrelated and +// has a completely different kind of legacy behaviour.) +def int_amdgcn_fma_legacy : + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; def int_amdgcn_rcp : Intrinsic< @@ -397,11 +409,10 @@ class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty], def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin; def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin; -class AMDGPULDSF32Intrin<string clang_builtin> : - GCCBuiltin<clang_builtin>, - Intrinsic<[llvm_float_ty], - [LLVMQualPointerType<llvm_float_ty, 3>, - llvm_float_ty, +class AMDGPULDSIntrin : + Intrinsic<[llvm_any_ty], + [LLVMQualPointerType<LLVMMatchType<0>, 3>, + LLVMMatchType<0>, llvm_i32_ty, // ordering llvm_i32_ty, // scope llvm_i1_ty], // isVolatile @@ -446,9 +457,9 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic; def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic; def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic; -def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">; -def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">; -def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">; +def int_amdgcn_ds_fadd : AMDGPULDSIntrin; +def int_amdgcn_ds_fmin : AMDGPULDSIntrin; +def int_amdgcn_ds_fmax : AMDGPULDSIntrin; } // TargetPrefix = "amdgcn" @@ -545,7 +556,7 @@ class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr // {offset} {bias} {z-compare} list<AMDGPUArg> ExtraAddrArgs = extra_addr; - bit Gradients = 0; + bit Gradients = false; // Name of the {lod} or {clamp} argument that is appended to the coordinates, // if any. @@ -585,7 +596,7 @@ defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = { defm AMDGPUSample : AMDGPUSampleHelper_Compare<"_LZ", "_lz", []>; } - let Gradients = 1 in { + let Gradients = true in { defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_D", "_d", []>; defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"_CD", "_cd", []>; } @@ -600,12 +611,12 @@ class AMDGPUDimProfile<string opmod, string OpMod = opmod; // the corresponding instruction is named IMAGE_OpMod // These are intended to be overwritten by subclasses - bit IsSample = 0; - bit IsAtomic = 0; + bit IsSample = false; + bit IsAtomic = false; list<LLVMType> RetTypes = []; list<AMDGPUArg> DataArgs = []; list<AMDGPUArg> ExtraAddrArgs = []; - bit Gradients = 0; + bit Gradients = false; string LodClampMip = ""; int NumRetAndDataAnyTypes = @@ -616,7 +627,7 @@ class AMDGPUDimProfile<string opmod, arglistconcat<[ExtraAddrArgs, !if(Gradients, dim.GradientArgs, []), !listconcat(!if(IsSample, dim.CoordSliceArgs, dim.CoordSliceIntArgs), - !if(!eq(LodClampMip, ""), + !if(!empty(LodClampMip), []<AMDGPUArg>, [AMDGPUArg<LLVMMatchType<0>, LodClampMip>]))], NumRetAndDataAnyTypes>.ret; @@ -646,7 +657,7 @@ class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod, class AMDGPUDimSampleProfile<string opmod, AMDGPUDimProps dim, AMDGPUSampleVariant sample> : AMDGPUDimProfile<opmod, dim> { - let IsSample = 1; + let IsSample = true; let RetTypes = [llvm_any_ty]; let ExtraAddrArgs = sample.ExtraAddrArgs; let Gradients = sample.Gradients; @@ -657,7 +668,7 @@ class AMDGPUDimNoSampleProfile<string opmod, AMDGPUDimProps dim, list<LLVMType> retty, list<AMDGPUArg> dataargs, - bit Mip = 0> : AMDGPUDimProfile<opmod, dim> { + bit Mip = false> : AMDGPUDimProfile<opmod, dim> { let RetTypes = retty; let DataArgs = dataargs; let LodClampMip = !if(Mip, "mip", ""); @@ -668,7 +679,7 @@ class AMDGPUDimAtomicProfile<string opmod, list<AMDGPUArg> dataargs> : AMDGPUDimProfile<opmod, dim> { let RetTypes = [llvm_anyint_ty]; let DataArgs = dataargs; - let IsAtomic = 1; + let IsAtomic = true; } class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> { @@ -681,13 +692,23 @@ class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RES // Helper class for figuring out image intrinsic argument indexes. class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> { int NumDataArgs = !size(P_.DataArgs); - int NumDmaskArgs = !if(P_.IsAtomic, 0, 1); + int NumDmaskArgs = !not(P_.IsAtomic); + int NumExtraAddrArgs = !size(P_.ExtraAddrArgs); int NumVAddrArgs = !size(P_.AddrArgs); + int NumGradientArgs = !if(P_.Gradients, !size(P_.Dim.GradientArgs), 0); + int NumCoordArgs = !if(P_.IsSample, !size(P_.Dim.CoordSliceArgs), !size(P_.Dim.CoordSliceIntArgs)); int NumRSrcArgs = 1; int NumSampArgs = !if(P_.IsSample, 2, 0); int DmaskArgIndex = NumDataArgs; - int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1); - int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs); + int VAddrArgIndex = !add(DmaskArgIndex, NumDmaskArgs); + int GradientArgIndex = !add(VAddrArgIndex, NumExtraAddrArgs); + int CoordArgIndex = !add(GradientArgIndex, NumGradientArgs); + int LodArgIndex = !add(VAddrArgIndex, NumVAddrArgs, -1); + int MipArgIndex = LodArgIndex; + int RsrcArgIndex = !add(VAddrArgIndex, NumVAddrArgs); + int SampArgIndex = !add(RsrcArgIndex, NumRSrcArgs); + int UnormArgIndex = !add(SampArgIndex, 1); + int TexFailCtrlArgIndex = !add(SampArgIndex, NumSampArgs); int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1); } @@ -738,7 +759,7 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = { list<AMDGPUArg> dataargs, list<IntrinsicProperty> props, list<SDNodeProperty> sdnodeprops, - bit Mip = 0> { + bit Mip = false> { foreach dim = AMDGPUDims.NoMsaa in { def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic< @@ -752,7 +773,7 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = { list<AMDGPUArg> dataargs, list<IntrinsicProperty> props, list<SDNodeProperty> sdnodeprops, - bit Mip = 0> { + bit Mip = false> { foreach dim = AMDGPUDims.All in { def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic< @@ -787,7 +808,7 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = { ////////////////////////////////////////////////////////////////////////// multiclass AMDGPUImageDimSampleDims<string opmod, AMDGPUSampleVariant sample, - bit NoMem = 0> { + bit NoMem = false> { foreach dim = AMDGPUDims.NoMsaa in { def !strconcat(NAME, "_", dim.Name) : AMDGPUImageDimIntrinsic< AMDGPUDimSampleProfile<opmod, dim, sample>, @@ -973,9 +994,9 @@ class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic < def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore; def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore; -class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic < - [data_ty], - [LLVMMatchType<0>, // vdata(VGPR) +class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic < + !if(NoRtn, [], [data_ty]), + [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling) @@ -1005,9 +1026,12 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic< [ImmArg<ArgIndex<5>>, IntrWillReturn], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; -class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic < - [data_ty], - [LLVMMatchType<0>, // vdata(VGPR) +// gfx908 intrinsic +def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>; + +class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic < + !if(NoRtn, [], [data_ty]), + [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR) llvm_v4i32_ty, // rsrc(SGPR) llvm_i32_ty, // vindex(VGPR) llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling) @@ -1039,6 +1063,10 @@ def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic< [ImmArg<ArgIndex<6>>, IntrWillReturn], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<2, 0>; +// gfx908 intrinsic +def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>; + + // Obsolescent tbuffer intrinsics. def int_amdgcn_tbuffer_load : Intrinsic < [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32 @@ -1168,6 +1196,19 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic< AMDGPURsrcIntrinsic<2, 0>; def int_amdgcn_buffer_atomic_csub : AMDGPUBufferAtomic; + +class AMDGPUBufferAtomicFP : Intrinsic < + [llvm_anyfloat_ty], + [LLVMMatchType<0>, // vdata(VGPR) + llvm_v4i32_ty, // rsrc(SGPR) + llvm_i32_ty, // vindex(VGPR) + llvm_i32_ty, // offset(SGPR/VGPR/imm) + llvm_i1_ty], // slc(imm) + [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>, + AMDGPURsrcIntrinsic<1, 0>; + +// Legacy form of the intrinsic. raw and struct forms should be preferred. +def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicFP; } // defset AMDGPUBufferIntrinsics // Uses that do not set the done bit should set IntrWriteMem on the @@ -1248,7 +1289,7 @@ def int_amdgcn_s_getreg : def int_amdgcn_s_setreg : GCCBuiltin<"__builtin_amdgcn_s_setreg">, Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>] + [IntrNoMem, IntrHasSideEffects, IntrWillReturn, ImmArg<ArgIndex<0>>] >; // int_amdgcn_s_getpc is provided to allow a specific style of position @@ -1291,6 +1332,7 @@ def int_amdgcn_interp_p2 : // See int_amdgcn_v_interp_p1 for why this is IntrNoMem. // __builtin_amdgcn_interp_p1_f16 <i>, <attr_chan>, <attr>, <high>, <m0> +// high selects whether high or low 16-bits are loaded from LDS def int_amdgcn_interp_p1_f16 : GCCBuiltin<"__builtin_amdgcn_interp_p1_f16">, Intrinsic<[llvm_float_ty], @@ -1299,6 +1341,7 @@ def int_amdgcn_interp_p1_f16 : ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; // __builtin_amdgcn_interp_p2_f16 <p1>, <j>, <attr_chan>, <attr>, <high>, <m0> +// high selects whether high or low 16-bits are loaded from LDS def int_amdgcn_interp_p2_f16 : GCCBuiltin<"__builtin_amdgcn_interp_p2_f16">, Intrinsic<[llvm_half_ty], @@ -1538,6 +1581,10 @@ def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], // FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn? def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>; +def int_amdgcn_endpgm : GCCBuiltin<"__builtin_amdgcn_endpgm">, + Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects] +>; + // Copies the active channels of the source value to the destination value, // with the guarantee that the source value is computed as if the entire // program were executed in Whole Wavefront Mode, i.e. with all channels @@ -1667,10 +1714,19 @@ class AMDGPUGlobalAtomicRtn<LLVMType vt> : Intrinsic < [vt], [llvm_anyptr_ty, // vaddr vt], // vdata(VGPR) - [IntrArgMemOnly, NoCapture<ArgIndex<0>>], "", [SDNPMemOperand]>; + [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>], "", + [SDNPMemOperand]>; def int_amdgcn_global_atomic_csub : AMDGPUGlobalAtomicRtn<llvm_i32_ty>; +// uint4 llvm.amdgcn.image.bvh.intersect.ray <node_ptr>, <ray_extent>, <ray_origin>, +// <ray_dir>, <ray_inv_dir>, <texture_descr> +def int_amdgcn_image_bvh_intersect_ray : + Intrinsic<[llvm_v4i32_ty], + [llvm_anyint_ty, llvm_float_ty, llvm_v4f32_ty, llvm_anyvector_ty, + LLVMMatchType<1>, llvm_v4i32_ty], + [IntrReadMem, IntrWillReturn]>; + //===----------------------------------------------------------------------===// // Deep learning intrinsics. //===----------------------------------------------------------------------===// @@ -1786,25 +1842,7 @@ def int_amdgcn_udot8 : // gfx908 intrinsics // ===----------------------------------------------------------------------===// -class AMDGPUBufferAtomicNoRtn : Intrinsic < - [], - [llvm_anyfloat_ty, // vdata(VGPR) - llvm_v4i32_ty, // rsrc(SGPR) - llvm_i32_ty, // vindex(VGPR) - llvm_i32_ty, // offset(SGPR/VGPR/imm) - llvm_i1_ty], // slc(imm) - [ImmArg<ArgIndex<4>>, IntrWillReturn], "", [SDNPMemOperand]>, - AMDGPURsrcIntrinsic<1, 0>; - -class AMDGPUGlobalAtomicNoRtn : Intrinsic < - [], - [llvm_anyptr_ty, // vaddr - llvm_anyfloat_ty], // vdata(VGPR) - [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>], "", - [SDNPMemOperand]>; - -def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicNoRtn; -def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicNoRtn; +def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>; // llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp def int_amdgcn_mfma_f32_32x32x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x1f32">, diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index df74e446b965..0eb27cc34462 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -791,14 +791,17 @@ def int_arm_neon_vcvtbfp2bf : Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem]>; def int_arm_neon_bfdot : Neon_Dot_Intrinsic; -def int_arm_neon_bfmmla : Neon_MatMul_Intrinsic; - -class Neon_FML_Intrinsic - : Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>], - [IntrNoMem]>; -def int_arm_neon_bfmlalb : Neon_FML_Intrinsic; -def int_arm_neon_bfmlalt : Neon_FML_Intrinsic; +def int_arm_neon_bfmmla + : Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; + +class Neon_BF16FML_Intrinsic + : Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty], + [IntrNoMem]>; +def int_arm_neon_bfmlalb : Neon_BF16FML_Intrinsic; +def int_arm_neon_bfmlalt : Neon_BF16FML_Intrinsic; def int_arm_cls: Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; def int_arm_cls64: Intrinsic<[llvm_i32_ty], [llvm_i64_ty], [IntrNoMem]>; @@ -814,9 +817,7 @@ def int_arm_neon_vcadd_rot90 : Neon_2Arg_Intrinsic; def int_arm_neon_vcadd_rot270 : Neon_2Arg_Intrinsic; // GNU eabi mcount -def int_arm_gnu_eabi_mcount : Intrinsic<[], - [], - [IntrReadMem, IntrWriteMem]>; +def int_arm_gnu_eabi_mcount : Intrinsic<[], [], []>; def int_arm_mve_pred_i2v : Intrinsic< [llvm_anyvector_ty], [llvm_i32_ty], [IntrNoMem]>; @@ -921,7 +922,7 @@ multiclass MVEPredicatedM<list<LLVMType> rets, list<LLVMType> params, list<IntrinsicProperty> props = [IntrNoMem]> { def "": Intrinsic<rets, params, props>; def _predicated: Intrinsic<rets, params # [pred, - !if(!eq(!cast<string>(rets[0]), "llvm_anyvector_ty"), + !if(!eq(rets[0], llvm_anyvector_ty), LLVMMatchType<0>, rets[0])], props>; } diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td index c4d35b2a0a88..4b4dd94b1599 100644 --- a/llvm/include/llvm/IR/IntrinsicsBPF.td +++ b/llvm/include/llvm/IR/IntrinsicsBPF.td @@ -24,6 +24,14 @@ let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf." Intrinsic<[llvm_i32_ty], [llvm_anyptr_ty, llvm_i64_ty], [IntrNoMem, ImmArg<ArgIndex<1>>]>; def int_bpf_btf_type_id : GCCBuiltin<"__builtin_bpf_btf_type_id">, - Intrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_any_ty, llvm_i64_ty], + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; + def int_bpf_preserve_type_info : GCCBuiltin<"__builtin_bpf_preserve_type_info">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_bpf_preserve_enum_value : GCCBuiltin<"__builtin_bpf_preserve_enum_value">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_ptr_ty, llvm_i64_ty], + [IntrNoMem]>; + def int_bpf_passthrough : GCCBuiltin<"__builtin_bpf_passthrough">, + Intrinsic<[llvm_any_ty], [llvm_i32_ty, llvm_any_ty], [IntrNoMem]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 61293418ec41..2ab48cfc4bb7 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -37,11 +37,6 @@ def llvm_anyi64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64* // MISC // -// Helper class for construction of n-element list<LLVMtype> [t,t,...,t] -class RepLLVMType<int N, LLVMType T> { - list<LLVMType> ret = !if(N, !listconcat(RepLLVMType<!add(N,-1), T>.ret, [T]), []); -} - // Helper class that represents a 'fragment' of an NVPTX *MMA instruction. // Geom: m<M>n<N>k<K>. E.g. m8n32k16 // Frag: [abcd] @@ -54,40 +49,40 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> { string ft = frag#":"#ptx_elt_type; list<LLVMType> regs = !cond( // mma.sync.m8n8k4 uses smaller a/b fragments than wmma fp ops - !eq(gft,"m8n8k4:a:f16") : RepLLVMType<2, llvm_v2f16_ty>.ret, - !eq(gft,"m8n8k4:b:f16") : RepLLVMType<2, llvm_v2f16_ty>.ret, + !eq(gft,"m8n8k4:a:f16") : !listsplat(llvm_v2f16_ty, 2), + !eq(gft,"m8n8k4:b:f16") : !listsplat(llvm_v2f16_ty, 2), // fp16 -> fp16/fp32 @ m16n16k16/m8n32k16/m32n8k16 // All currently supported geometries use the same fragment format, // so we only need to consider {fragment, type}. - !eq(ft,"a:f16") : RepLLVMType<8, llvm_v2f16_ty>.ret, - !eq(ft,"b:f16") : RepLLVMType<8, llvm_v2f16_ty>.ret, - !eq(ft,"c:f16") : RepLLVMType<4, llvm_v2f16_ty>.ret, - !eq(ft,"d:f16") : RepLLVMType<4, llvm_v2f16_ty>.ret, - !eq(ft,"c:f32") : RepLLVMType<8, llvm_float_ty>.ret, - !eq(ft,"d:f32") : RepLLVMType<8, llvm_float_ty>.ret, + !eq(ft,"a:f16") : !listsplat(llvm_v2f16_ty, 8), + !eq(ft,"b:f16") : !listsplat(llvm_v2f16_ty, 8), + !eq(ft,"c:f16") : !listsplat(llvm_v2f16_ty, 4), + !eq(ft,"d:f16") : !listsplat(llvm_v2f16_ty, 4), + !eq(ft,"c:f32") : !listsplat(llvm_float_ty, 8), + !eq(ft,"d:f32") : !listsplat(llvm_float_ty, 8), // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 - !eq(gft,"m16n16k16:a:u8") : RepLLVMType<2, llvm_i32_ty>.ret, - !eq(gft,"m16n16k16:a:s8") : RepLLVMType<2, llvm_i32_ty>.ret, - !eq(gft,"m16n16k16:b:u8") : RepLLVMType<2, llvm_i32_ty>.ret, - !eq(gft,"m16n16k16:b:s8") : RepLLVMType<2, llvm_i32_ty>.ret, - !eq(gft,"m16n16k16:c:s32") : RepLLVMType<8, llvm_i32_ty>.ret, - !eq(gft,"m16n16k16:d:s32") : RepLLVMType<8, llvm_i32_ty>.ret, + !eq(gft,"m16n16k16:a:u8") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n16k16:a:s8") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n16k16:b:u8") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n16k16:b:s8") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m16n16k16:c:s32") : !listsplat(llvm_i32_ty, 8), + !eq(gft,"m16n16k16:d:s32") : !listsplat(llvm_i32_ty, 8), !eq(gft,"m8n32k16:a:u8") : [llvm_i32_ty], !eq(gft,"m8n32k16:a:s8") : [llvm_i32_ty], - !eq(gft,"m8n32k16:b:u8") : RepLLVMType<4, llvm_i32_ty>.ret, - !eq(gft,"m8n32k16:b:s8") : RepLLVMType<4, llvm_i32_ty>.ret, - !eq(gft,"m8n32k16:c:s32") : RepLLVMType<8, llvm_i32_ty>.ret, - !eq(gft,"m8n32k16:d:s32") : RepLLVMType<8, llvm_i32_ty>.ret, + !eq(gft,"m8n32k16:b:u8") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m8n32k16:b:s8") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m8n32k16:c:s32") : !listsplat(llvm_i32_ty, 8), + !eq(gft,"m8n32k16:d:s32") : !listsplat(llvm_i32_ty, 8), - !eq(gft,"m32n8k16:a:u8") : RepLLVMType<4, llvm_i32_ty>.ret, - !eq(gft,"m32n8k16:a:s8") : RepLLVMType<4, llvm_i32_ty>.ret, + !eq(gft,"m32n8k16:a:u8") : !listsplat(llvm_i32_ty, 4), + !eq(gft,"m32n8k16:a:s8") : !listsplat(llvm_i32_ty, 4), !eq(gft,"m32n8k16:b:u8") : [llvm_i32_ty], !eq(gft,"m32n8k16:b:s8") : [llvm_i32_ty], - !eq(gft,"m32n8k16:c:s32") : RepLLVMType<8, llvm_i32_ty>.ret, - !eq(gft,"m32n8k16:d:s32") : RepLLVMType<8, llvm_i32_ty>.ret, + !eq(gft,"m32n8k16:c:s32") : !listsplat(llvm_i32_ty, 8), + !eq(gft,"m32n8k16:d:s32") : !listsplat(llvm_i32_ty, 8), // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1) !eq(gft,"m8n8k128:a:b1") : [llvm_i32_ty], @@ -96,10 +91,10 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> { !eq(gft,"m8n8k128:b:b1") : [llvm_i32_ty], !eq(gft,"m8n8k32:b:u4") : [llvm_i32_ty], !eq(gft,"m8n8k32:b:s4") : [llvm_i32_ty], - !eq(gft,"m8n8k128:c:s32") : RepLLVMType<2, llvm_i32_ty>.ret, - !eq(gft,"m8n8k128:d:s32") : RepLLVMType<2, llvm_i32_ty>.ret, - !eq(gft,"m8n8k32:c:s32") : RepLLVMType<2, llvm_i32_ty>.ret, - !eq(gft,"m8n8k32:d:s32") : RepLLVMType<2, llvm_i32_ty>.ret, + !eq(gft,"m8n8k128:c:s32") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m8n8k128:d:s32") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m8n8k32:c:s32") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m8n8k32:d:s32") : !listsplat(llvm_i32_ty, 2), ); } @@ -133,7 +128,7 @@ class MMA_SIGNATURE<WMMA_REGS A, WMMA_REGS B, WMMA_REGS C, WMMA_REGS D> { !eq(A.ptx_elt_type, "u4") : [A], !eq(A.ptx_elt_type, "b1") : [A], // the rest are FP ops identified by accumulator & result type. - 1: [D, C] + true: [D, C] ); string ret = !foldl("", id_frags, a, b, !strconcat(a, ".", b.ptx_elt_type)); } @@ -230,19 +225,17 @@ class NVVM_MMA_OPS<int _ = 0> { ldst_bit_ab_ops, ldst_subint_cd_ops); // Separate A/B/C fragments (loads) from D (stores). - list<WMMA_REGS> all_ld_ops = !foldl([]<WMMA_REGS>, all_ldst_ops, a, b, - !listconcat(a, !if(!eq(b.frag,"d"), [],[b]))); - list<WMMA_REGS> all_st_ops = !foldl([]<WMMA_REGS>, all_ldst_ops, a, b, - !listconcat(a, !if(!eq(b.frag,"d"), [b],[]))); + list<WMMA_REGS> all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d")); + list<WMMA_REGS> all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d")); } def NVVM_MMA_OPS : NVVM_MMA_OPS; -// Returns [1] if this combination of layout/satf is supported, [] otherwise. +// Returns true if this combination of layout/satf is supported; false otherwise. // MMA ops must provide all parameters. Loads and stores -- only frags and layout_a. // The class is used to prevent generation of records for the unsupported variants. // E.g. -// foreach _ = NVVM_MMA_SUPPORTED<...>.ret in = +// if NVVM_MMA_SUPPORTED<...>.ret then // def : FOO<>; // The record will only be defined for supported ops. // class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b="-", int satf=-1> { @@ -268,20 +261,20 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b # !if(!eq(!size(frags), 4), frags[2].ptx_elt_type # frags[3].ptx_elt_type, "?"); - list<int> ret = !cond( + bit ret = !cond( // Sub-int MMA only supports fixed A/B layout. // b1 does not support .satf. - !eq(mma#":"#satf, "b1:row:col:0") : [1], + !eq(mma#":"#satf, "b1:row:col:0") : true, // mma.m8n8k4 has no .satf modifier. !and(!eq(frags[0].geom, "m8n8k4"), - !ne(satf, 0)): [], + !ne(satf, 0)): false, // mma.m8n8k4 has no C=f32 D=f16 variant. - !eq(gcd, "m8n8k4:f32f16"): [], - !eq(mma, "s4:row:col") : [1], - !eq(mma, "u4:row:col") : [1], - !eq(mma, "s4:row:col") : [1], - !eq(mma, "u4:row:col") : [1], + !eq(gcd, "m8n8k4:f32f16"): false, + !eq(mma, "s4:row:col") : true, + !eq(mma, "u4:row:col") : true, + !eq(mma, "s4:row:col") : true, + !eq(mma, "u4:row:col") : true, // Sub-int load/stores have fixed layout for A and B. !and(!eq(layout_b, "-"), // It's a Load or Store op !or(!eq(ld, "b1:a:row"), @@ -295,13 +288,13 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b !eq(ld, "u4:a:row"), !eq(ld, "u4:b:col"), !eq(ldf, "u4:c"), - !eq(ldf, "u4:d"))) : [1], + !eq(ldf, "u4:d"))) : true, // All other sub-int ops are not supported. - !eq(t, "b1") : [], - !eq(t, "s4") : [], - !eq(t, "u4") : [], + !eq(t, "b1") : false, + !eq(t, "s4") : false, + !eq(t, "u4") : false, // All other (non sub-int) are OK. - 1: [1] + true: true ); } @@ -314,8 +307,8 @@ class SHFL_INFO<bit sync, string mode, string type, bit return_pred> { string Name = "int_nvvm_shfl_" # Suffix; string Builtin = "__nvvm_shfl_" # Suffix; string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix); - list<int> withGccBuiltin = !if(return_pred, [], [1]); - list<int> withoutGccBuiltin = !if(return_pred, [1], []); + bit withGccBuiltin = !not(return_pred); + bit withoutGccBuiltin = return_pred; LLVMType OpType = !cond( !eq(type,"i32"): llvm_i32_ty, !eq(type,"f32"): llvm_float_ty); @@ -4005,18 +3998,18 @@ def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">; // SHUFFLE // // Generate intrinsics for all variants of shfl instruction. -foreach sync = [0, 1] in { +foreach sync = [false, true] in { foreach mode = ["up", "down", "bfly", "idx"] in { foreach type = ["i32", "f32"] in { - foreach return_pred = [0, 1] in { + foreach return_pred = [false, true] in { foreach i = [SHFL_INFO<sync, mode, type, return_pred>] in { - foreach _ = i.withGccBuiltin in { + if i.withGccBuiltin then { def i.Name : GCCBuiltin<i.Builtin>, Intrinsic<i.RetTy, i.ArgsTy, [IntrInaccessibleMemOnly, IntrConvergent], i.IntrName>; } - foreach _ = i.withoutGccBuiltin in { + if i.withoutGccBuiltin then { def i.Name : Intrinsic<i.RetTy, i.ArgsTy, [IntrInaccessibleMemOnly, IntrConvergent], i.IntrName>; } @@ -4127,11 +4120,11 @@ class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride> foreach layout = ["row", "col"] in { foreach stride = [0, 1] in { foreach frag = NVVM_MMA_OPS.all_ld_ops in - foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in + if NVVM_MMA_SUPPORTED<[frag], layout>.ret then def WMMA_NAME_LDST<"load", frag, layout, stride>.record : NVVM_WMMA_LD<frag, layout, stride>; foreach frag = NVVM_MMA_OPS.all_st_ops in - foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in + if NVVM_MMA_SUPPORTED<[frag], layout>.ret then def WMMA_NAME_LDST<"store", frag, layout, stride>.record : NVVM_WMMA_ST<frag, layout, stride>; } @@ -4150,7 +4143,7 @@ foreach layout_a = ["row", "col"] in { foreach layout_b = ["row", "col"] in { foreach satf = [0, 1] in { foreach op = NVVM_MMA_OPS.all_mma_ops in { - foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in { + if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { def WMMA_NAME_MMA<layout_a, layout_b, satf, op[0], op[1], op[2], op[3]>.record : NVVM_WMMA_MMA<layout_a, layout_b, satf, diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 614a29049686..075b6252d9a5 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -18,10 +18,12 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". // dcba/dcbf/dcbi/dcbst/dcbt/dcbz/dcbzl(PPC970) instructions. def int_ppc_dcba : Intrinsic<[], [llvm_ptr_ty], []>; - def int_ppc_dcbf : GCCBuiltin<"__builtin_dcbf">, - Intrinsic<[], [llvm_ptr_ty], []>; - def int_ppc_dcbfl : Intrinsic<[], [llvm_ptr_ty], []>; - def int_ppc_dcbflp: Intrinsic<[], [llvm_ptr_ty], []>; + def int_ppc_dcbf : GCCBuiltin<"__builtin_dcbf">, + Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>; + def int_ppc_dcbfl : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>; + def int_ppc_dcbflp : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>; + def int_ppc_dcbfps : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>; + def int_ppc_dcbstps : Intrinsic<[], [llvm_ptr_ty], [IntrArgMemOnly]>; def int_ppc_dcbi : Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], []>; def int_ppc_dcbt : Intrinsic<[], [llvm_ptr_ty], @@ -47,6 +49,13 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". // eieio instruction def int_ppc_eieio : Intrinsic<[],[],[]>; + // Get content from current FPSCR register + def int_ppc_readflm : GCCBuiltin<"__builtin_readflm">, + Intrinsic<[llvm_double_ty], [], [IntrNoMem]>; + // Set FPSCR register, and return previous content + def int_ppc_setflm : GCCBuiltin<"__builtin_setflm">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], []>; + // Intrinsics for [double]word extended forms of divide instructions def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">, Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], @@ -61,6 +70,14 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + // Generate a random number + def int_ppc_darn : GCCBuiltin<"__builtin_darn">, + Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; + def int_ppc_darnraw : GCCBuiltin<"__builtin_darn_raw">, + Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; + def int_ppc_darn32 : GCCBuiltin<"__builtin_darn_32">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + // Bit permute doubleword def int_ppc_bpermd : GCCBuiltin<"__builtin_bpermd">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], @@ -135,6 +152,28 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". } //===----------------------------------------------------------------------===// +// PowerPC MMA Intrinsic Multi Class Definitions. +// + +multiclass PowerPC_MMA_ACC_Intrinsic<list<LLVMType> args> { + def NAME: Intrinsic<[llvm_v512i1_ty], args, [IntrNoMem]>; + def pp : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args), + [IntrNoMem]>; + def pn : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args), + [IntrNoMem]>; + def np : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args), + [IntrNoMem]>; + def nn : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args), + [IntrNoMem]>; +} + +multiclass PowerPC_MMA_ACC_PP_Intrinsic<list<LLVMType> args> { + def NAME: Intrinsic<[llvm_v512i1_ty], args, [IntrNoMem]>; + def pp : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args), + [IntrNoMem]>; +} + +//===----------------------------------------------------------------------===// // PowerPC Altivec Intrinsic Class Definitions. // @@ -186,6 +225,13 @@ class PowerPC_Vec_QQQ_Intrinsic<string GCCIntSuffix> [llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty], [IntrNoMem]>; +/// PowerPC_Vec_QDD_Intrinsic - A PowerPC intrinsic that takes two v2i64 +/// vectors and returns one v1i128. These intrinsics have no side effects. +class PowerPC_Vec_QDD_Intrinsic<string GCCIntSuffix> + : PowerPC_Vec_Intrinsic<GCCIntSuffix, + [llvm_v1i128_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + //===----------------------------------------------------------------------===// // PowerPC VSX Intrinsic Class Definitions. // @@ -239,9 +285,9 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". // VSCR access. def int_ppc_altivec_mfvscr : GCCBuiltin<"__builtin_altivec_mfvscr">, - Intrinsic<[llvm_v8i16_ty], [], [IntrReadMem]>; + Intrinsic<[llvm_v8i16_ty], [], [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_mtvscr : GCCBuiltin<"__builtin_altivec_mtvscr">, - Intrinsic<[], [llvm_v4i32_ty], []>; + Intrinsic<[], [llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; // Loads. These don't map directly to GCC builtins because they represent the @@ -347,6 +393,28 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vcmpequq : GCCBuiltin<"__builtin_altivec_vcmpequq">, + Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; + def int_ppc_altivec_vcmpgtsq : GCCBuiltin<"__builtin_altivec_vcmpgtsq">, + Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; + def int_ppc_altivec_vcmpgtuq : GCCBuiltin<"__builtin_altivec_vcmpgtuq">, + Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; + def int_ppc_altivec_vcmpequq_p : GCCBuiltin<"__builtin_altivec_vcmpequq_p">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty], + [IntrNoMem]>; + def int_ppc_altivec_vcmpgtsq_p : GCCBuiltin<"__builtin_altivec_vcmpgtsq_p">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty], + [IntrNoMem]>; + def int_ppc_altivec_vcmpgtuq_p : GCCBuiltin<"__builtin_altivec_vcmpgtuq_p">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty,llvm_v1i128_ty,llvm_v1i128_ty], + [IntrNoMem]>; + // Predicate Comparisons. The first operand specifies interpretation of CR6. def int_ppc_altivec_vcmpbfp_p : GCCBuiltin<"__builtin_altivec_vcmpbfp_p">, Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty], @@ -429,6 +497,56 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">, Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>; + // P10 Vector Extract with Mask + def int_ppc_altivec_vextractbm : GCCBuiltin<"__builtin_altivec_vextractbm">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vextracthm : GCCBuiltin<"__builtin_altivec_vextracthm">, + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vextractwm : GCCBuiltin<"__builtin_altivec_vextractwm">, + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vextractdm : GCCBuiltin<"__builtin_altivec_vextractdm">, + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_ppc_altivec_vextractqm : GCCBuiltin<"__builtin_altivec_vextractqm">, + Intrinsic<[llvm_i32_ty], [llvm_v1i128_ty], [IntrNoMem]>; + + // P10 Vector Expand with Mask + def int_ppc_altivec_vexpandbm : GCCBuiltin<"__builtin_altivec_vexpandbm">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandhm : GCCBuiltin<"__builtin_altivec_vexpandhm">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandwm : GCCBuiltin<"__builtin_altivec_vexpandwm">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpanddm : GCCBuiltin<"__builtin_altivec_vexpanddm">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandqm : GCCBuiltin<"__builtin_altivec_vexpandqm">, + Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty], [IntrNoMem]>; + + // P10 Vector Count with Mask intrinsics. + def int_ppc_altivec_vcntmbb : GCCBuiltin<"__builtin_altivec_vcntmbb">, + Intrinsic<[llvm_i64_ty], [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + def int_ppc_altivec_vcntmbh : GCCBuiltin<"__builtin_altivec_vcntmbh">, + Intrinsic<[llvm_i64_ty], [llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + def int_ppc_altivec_vcntmbw : GCCBuiltin<"__builtin_altivec_vcntmbw">, + Intrinsic<[llvm_i64_ty], [llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + def int_ppc_altivec_vcntmbd : GCCBuiltin<"__builtin_altivec_vcntmbd">, + Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + + // P10 Move to VSR with Mask Intrinsics. + def int_ppc_altivec_mtvsrbm : GCCBuiltin<"__builtin_altivec_mtvsrbm">, + Intrinsic<[llvm_v16i8_ty], [llvm_i64_ty], [IntrNoMem]>; + def int_ppc_altivec_mtvsrhm : GCCBuiltin<"__builtin_altivec_mtvsrhm">, + Intrinsic<[llvm_v8i16_ty], [llvm_i64_ty], [IntrNoMem]>; + def int_ppc_altivec_mtvsrwm : GCCBuiltin<"__builtin_altivec_mtvsrwm">, + Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty], [IntrNoMem]>; + def int_ppc_altivec_mtvsrdm : GCCBuiltin<"__builtin_altivec_mtvsrdm">, + Intrinsic<[llvm_v2i64_ty], [llvm_i64_ty], [IntrNoMem]>; + def int_ppc_altivec_mtvsrqm : GCCBuiltin<"__builtin_altivec_mtvsrqm">, + Intrinsic<[llvm_v1i128_ty], [llvm_i64_ty], [IntrNoMem]>; + // P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins. def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], @@ -437,6 +555,25 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + // P10 Vector String Isolate Intrinsics. + def int_ppc_altivec_vstribr : GCCBuiltin<"__builtin_altivec_vstribr">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vstribl : GCCBuiltin<"__builtin_altivec_vstribl">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vstrihr : GCCBuiltin<"__builtin_altivec_vstrihr">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vstrihl : GCCBuiltin<"__builtin_altivec_vstrihl">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; + // Predicate Intrinsics: The first operand specifies interpretation of CR6. + def int_ppc_altivec_vstribr_p : GCCBuiltin<"__builtin_altivec_vstribr_p">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vstribl_p : GCCBuiltin<"__builtin_altivec_vstribl_p">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vstrihr_p : GCCBuiltin<"__builtin_altivec_vstrihr_p">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vstrihl_p : GCCBuiltin<"__builtin_altivec_vstrihl_p">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_v8i16_ty], [IntrNoMem]>; + // P10 Vector Centrifuge Builtin. def int_ppc_altivec_vcfuged : GCCBuiltin<"__builtin_altivec_vcfuged">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], @@ -468,27 +605,27 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". // P10 Vector Insert. def int_ppc_altivec_vinsblx : GCCBuiltin<"__builtin_altivec_vinsblx">, Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_altivec_vinsbrx : GCCBuiltin<"__builtin_altivec_vinsbrx">, Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_altivec_vinshlx : GCCBuiltin<"__builtin_altivec_vinshlx">, Intrinsic<[llvm_v8i16_ty], - [llvm_v8i16_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_altivec_vinshrx : GCCBuiltin<"__builtin_altivec_vinshrx">, Intrinsic<[llvm_v8i16_ty], - [llvm_v8i16_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_altivec_vinswlx : GCCBuiltin<"__builtin_altivec_vinswlx">, Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_altivec_vinswrx : GCCBuiltin<"__builtin_altivec_vinswrx">, Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_ppc_altivec_vinsdlx : GCCBuiltin<"__builtin_altivec_vinsdlx">, Intrinsic<[llvm_v2i64_ty], @@ -500,37 +637,70 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". [IntrNoMem]>; def int_ppc_altivec_vinsbvlx : GCCBuiltin<"__builtin_altivec_vinsbvlx">, Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_i64_ty, llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_altivec_vinsbvrx : GCCBuiltin<"__builtin_altivec_vinsbvrx">, Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_i64_ty, llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_altivec_vinshvlx : GCCBuiltin<"__builtin_altivec_vinshvlx">, Intrinsic<[llvm_v8i16_ty], - [llvm_v8i16_ty, llvm_i64_ty, llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_ppc_altivec_vinshvrx : GCCBuiltin<"__builtin_altivec_vinshvrx">, Intrinsic<[llvm_v8i16_ty], - [llvm_v8i16_ty, llvm_i64_ty, llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty], [IntrNoMem]>; def int_ppc_altivec_vinswvlx : GCCBuiltin<"__builtin_altivec_vinswvlx">, Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_i64_ty, llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vinswvrx : GCCBuiltin<"__builtin_altivec_vinswvrx">, Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_i64_ty, llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>; // P10 Vector Insert with immediate. def int_ppc_altivec_vinsw : Intrinsic<[llvm_v4i32_ty], - [llvm_v4i32_ty, llvm_i64_ty, llvm_i32_ty], + [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; def int_ppc_altivec_vinsd : Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; + // P10 Vector Extract. + def int_ppc_altivec_vextdubvlx : GCCBuiltin<"__builtin_altivec_vextdubvlx">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_ppc_altivec_vextdubvrx : GCCBuiltin<"__builtin_altivec_vextdubvrx">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_ppc_altivec_vextduhvlx : GCCBuiltin<"__builtin_altivec_vextduhvlx">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_ppc_altivec_vextduhvrx : GCCBuiltin<"__builtin_altivec_vextduhvrx">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_ppc_altivec_vextduwvlx : GCCBuiltin<"__builtin_altivec_vextduwvlx">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_ppc_altivec_vextduwvrx : GCCBuiltin<"__builtin_altivec_vextduwvrx">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_ppc_altivec_vextddvlx : GCCBuiltin<"__builtin_altivec_vextddvlx">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_ppc_altivec_vextddvrx : GCCBuiltin<"__builtin_altivec_vextddvrx">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem]>; } // Vector average. @@ -587,10 +757,12 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". // Saturating multiply-adds. def int_ppc_altivec_vmhaddshs : GCCBuiltin<"__builtin_altivec_vmhaddshs">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vmhraddshs : GCCBuiltin<"__builtin_altivec_vmhraddshs">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, - llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>; + llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vmaddfp : GCCBuiltin<"__builtin_altivec_vmaddfp">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, @@ -608,7 +780,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". llvm_v4i32_ty], [IntrNoMem]>; def int_ppc_altivec_vmsumshs : GCCBuiltin<"__builtin_altivec_vmsumshs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v4i32_ty], [IntrNoMem]>; + llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vmsumubm : GCCBuiltin<"__builtin_altivec_vmsumubm">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v4i32_ty], [IntrNoMem]>; @@ -620,7 +792,10 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". llvm_v1i128_ty], [IntrNoMem]>; def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, - llvm_v4i32_ty], [IntrNoMem]>; + llvm_v4i32_ty], [IntrNoMem, IntrHasSideEffects]>; + def int_ppc_altivec_vmsumcud : GCCBuiltin<"__builtin_altivec_vmsumcud">, + Intrinsic<[llvm_v1i128_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v1i128_ty], [IntrNoMem]>; // Vector Multiply Instructions. def int_ppc_altivec_vmulesb : GCCBuiltin<"__builtin_altivec_vmulesb">, @@ -632,6 +807,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmulesw : GCCBuiltin<"__builtin_altivec_vmulesw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmulesd : PowerPC_Vec_QDD_Intrinsic<"vmulesd">; def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -641,6 +817,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmuleuw : GCCBuiltin<"__builtin_altivec_vmuleuw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmuleud : PowerPC_Vec_QDD_Intrinsic<"vmuleud">; def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], @@ -651,6 +828,7 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmulosw : GCCBuiltin<"__builtin_altivec_vmulosw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmulosd : PowerPC_Vec_QDD_Intrinsic<"vmulosd">; def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -660,23 +838,38 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vmulouw : GCCBuiltin<"__builtin_altivec_vmulouw">, Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vmuloud : PowerPC_Vec_QDD_Intrinsic<"vmuloud">; // Vector Sum Instructions. def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vsum2sws : GCCBuiltin<"__builtin_altivec_vsum2sws">, Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vsum4sbs : GCCBuiltin<"__builtin_altivec_vsum4sbs">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vsum4shs : GCCBuiltin<"__builtin_altivec_vsum4shs">, Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vsum4ubs : GCCBuiltin<"__builtin_altivec_vsum4ubs">, Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; + + // Vector Sign Extension Instructions + def int_ppc_altivec_vextsb2w : GCCBuiltin<"__builtin_altivec_vextsb2w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsb2d : GCCBuiltin<"__builtin_altivec_vextsb2d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsh2w : GCCBuiltin<"__builtin_altivec_vextsh2w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsh2d : GCCBuiltin<"__builtin_altivec_vextsh2d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsw2d : GCCBuiltin<"__builtin_altivec_vextsw2d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsd2q : GCCBuiltin<"__builtin_altivec_vextsd2q">, + Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty], [IntrNoMem]>; // Other multiplies. def int_ppc_altivec_vmladduhm : GCCBuiltin<"__builtin_altivec_vmladduhm">, @@ -689,34 +882,34 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". [IntrNoMem]>; def int_ppc_altivec_vpkshss : GCCBuiltin<"__builtin_altivec_vpkshss">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpkshus : GCCBuiltin<"__builtin_altivec_vpkshus">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpkswss : GCCBuiltin<"__builtin_altivec_vpkswss">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpkswus : GCCBuiltin<"__builtin_altivec_vpkswus">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpksdss : GCCBuiltin<"__builtin_altivec_vpksdss">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; def int_ppc_altivec_vpksdus : GCCBuiltin<"__builtin_altivec_vpksdus">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // vpkuhum is lowered to a shuffle. def int_ppc_altivec_vpkuhus : GCCBuiltin<"__builtin_altivec_vpkuhus">, Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // vpkuwum is lowered to a shuffle. def int_ppc_altivec_vpkuwus : GCCBuiltin<"__builtin_altivec_vpkuwus">, Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // vpkudum is lowered to a shuffle. def int_ppc_altivec_vpkudus : GCCBuiltin<"__builtin_altivec_vpkudus">, Intrinsic<[llvm_v4i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty], - [IntrNoMem]>; + [IntrNoMem, IntrHasSideEffects]>; // Unpacks. def int_ppc_altivec_vupkhpx : GCCBuiltin<"__builtin_altivec_vupkhpx">, @@ -898,6 +1091,29 @@ def int_ppc_altivec_vrldmi : [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_ppc_altivec_vrlqnm : + PowerPC_Vec_Intrinsic<"vrlqnm", [llvm_v1i128_ty], + [llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; +def int_ppc_altivec_vrlqmi : + PowerPC_Vec_Intrinsic<"vrlqmi", [llvm_v1i128_ty], + [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; + +// Vector Divide Extended Intrinsics. +def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">; +def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">; +def int_ppc_altivec_vdivesd : PowerPC_Vec_DDD_Intrinsic<"vdivesd">; +def int_ppc_altivec_vdiveud : PowerPC_Vec_DDD_Intrinsic<"vdiveud">; +def int_ppc_altivec_vdivesq : PowerPC_Vec_QQQ_Intrinsic<"vdivesq">; +def int_ppc_altivec_vdiveuq : PowerPC_Vec_QQQ_Intrinsic<"vdiveuq">; + +// Vector Multiply High Intrinsics. +def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">; +def int_ppc_altivec_vmulhuw : PowerPC_Vec_WWW_Intrinsic<"vmulhuw">; +def int_ppc_altivec_vmulhsd : PowerPC_Vec_DDD_Intrinsic<"vmulhsd">; +def int_ppc_altivec_vmulhud : PowerPC_Vec_DDD_Intrinsic<"vmulhud">; + //===----------------------------------------------------------------------===// // PowerPC VSX Intrinsic Definitions. @@ -918,12 +1134,8 @@ def int_ppc_vsx_lxvl : def int_ppc_vsx_lxvll : Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem, IntrArgMemOnly]>; -def int_ppc_vsx_stxvl : - Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty], - [IntrWriteMem, IntrArgMemOnly]>; -def int_ppc_vsx_stxvll : - Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty], - [IntrWriteMem, IntrArgMemOnly]>; +def int_ppc_vsx_lxvp : + Intrinsic<[llvm_v256i1_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; // Vector store. def int_ppc_vsx_stxvw4x : Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], @@ -934,6 +1146,15 @@ def int_ppc_vsx_stxvw4x_be : Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], [IntrWriteMem, IntrArgMemOnly]>; def int_ppc_vsx_stxvd2x_be : Intrinsic<[], [llvm_v2f64_ty, llvm_ptr_ty], [IntrWriteMem, IntrArgMemOnly]>; +def int_ppc_vsx_stxvl : + Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly]>; +def int_ppc_vsx_stxvll : + Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly]>; +def int_ppc_vsx_stxvp : + Intrinsic<[], [llvm_v256i1_ty, llvm_ptr_ty], [IntrWriteMem, + IntrArgMemOnly]>; // Vector and scalar maximum. def int_ppc_vsx_xvmaxdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvmaxdp">; def int_ppc_vsx_xvmaxsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvmaxsp">; @@ -1060,6 +1281,12 @@ def int_ppc_vsx_xvtstdcsp : def int_ppc_vsx_xvcvhpsp : PowerPC_VSX_Intrinsic<"xvcvhpsp", [llvm_v4f32_ty], [llvm_v8i16_ty],[IntrNoMem]>; +def int_ppc_vsx_xvcvspbf16 : + PowerPC_VSX_Intrinsic<"xvcvspbf16", [llvm_v16i8_ty], + [llvm_v16i8_ty], [IntrNoMem]>; +def int_ppc_vsx_xvcvbf16spn : + PowerPC_VSX_Intrinsic<"xvcvbf16spn", [llvm_v16i8_ty], + [llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_vsx_xxextractuw : PowerPC_VSX_Intrinsic<"xxextractuw",[llvm_v2i64_ty], [llvm_v2i64_ty,llvm_i32_ty], [IntrNoMem]>; @@ -1069,7 +1296,17 @@ def int_ppc_vsx_xxinsertw : [IntrNoMem]>; def int_ppc_vsx_xvtlsbb : PowerPC_VSX_Intrinsic<"xvtlsbb", [llvm_i32_ty], - [llvm_v16i8_ty, llvm_i1_ty], [IntrNoMem]>; + [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_ppc_vsx_xvtdivdp : + PowerPC_VSX_Intrinsic<"xvtdivdp", [llvm_i32_ty], + [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; +def int_ppc_vsx_xvtdivsp : + PowerPC_VSX_Intrinsic<"xvtdivsp", [llvm_i32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; +def int_ppc_vsx_xvtsqrtdp : + PowerPC_VSX_Intrinsic<"xvtsqrtdp", [llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; +def int_ppc_vsx_xvtsqrtsp : + PowerPC_VSX_Intrinsic<"xvtsqrtsp", [llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_ppc_vsx_xxeval : PowerPC_VSX_Intrinsic<"xxeval", [llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, @@ -1110,182 +1347,6 @@ def int_ppc_vsx_xxblendvd: GCCBuiltin<"__builtin_vsx_xxblendvd">, } //===----------------------------------------------------------------------===// -// PowerPC QPX Intrinsics. -// - -let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". - /// PowerPC_QPX_Intrinsic - Base class for all QPX intrinsics. - class PowerPC_QPX_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types, - list<LLVMType> param_types, - list<IntrinsicProperty> properties> - : GCCBuiltin<!strconcat("__builtin_qpx_", GCCIntSuffix)>, - Intrinsic<ret_types, param_types, properties>; -} - -//===----------------------------------------------------------------------===// -// PowerPC QPX Intrinsic Class Definitions. -// - -/// PowerPC_QPX_FF_Intrinsic - A PowerPC intrinsic that takes one v4f64 -/// vector and returns one. These intrinsics have no side effects. -class PowerPC_QPX_FF_Intrinsic<string GCCIntSuffix> - : PowerPC_QPX_Intrinsic<GCCIntSuffix, - [llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; - -/// PowerPC_QPX_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f64 -/// vectors and returns one. These intrinsics have no side effects. -class PowerPC_QPX_FFF_Intrinsic<string GCCIntSuffix> - : PowerPC_QPX_Intrinsic<GCCIntSuffix, - [llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty], - [IntrNoMem]>; - -/// PowerPC_QPX_FFFF_Intrinsic - A PowerPC intrinsic that takes three v4f64 -/// vectors and returns one. These intrinsics have no side effects. -class PowerPC_QPX_FFFF_Intrinsic<string GCCIntSuffix> - : PowerPC_QPX_Intrinsic<GCCIntSuffix, - [llvm_v4f64_ty], - [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], - [IntrNoMem]>; - -/// PowerPC_QPX_Load_Intrinsic - A PowerPC intrinsic that takes a pointer -/// and returns a v4f64. -class PowerPC_QPX_Load_Intrinsic<string GCCIntSuffix> - : PowerPC_QPX_Intrinsic<GCCIntSuffix, - [llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; - -/// PowerPC_QPX_LoadPerm_Intrinsic - A PowerPC intrinsic that takes a pointer -/// and returns a v4f64 permutation. -class PowerPC_QPX_LoadPerm_Intrinsic<string GCCIntSuffix> - : PowerPC_QPX_Intrinsic<GCCIntSuffix, - [llvm_v4f64_ty], [llvm_ptr_ty], [IntrNoMem]>; - -/// PowerPC_QPX_Store_Intrinsic - A PowerPC intrinsic that takes a pointer -/// and stores a v4f64. -class PowerPC_QPX_Store_Intrinsic<string GCCIntSuffix> - : PowerPC_QPX_Intrinsic<GCCIntSuffix, - [], [llvm_v4f64_ty, llvm_ptr_ty], - [IntrWriteMem, IntrArgMemOnly]>; - -//===----------------------------------------------------------------------===// -// PowerPC QPX Intrinsic Definitions. - -let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". - // Add Instructions - def int_ppc_qpx_qvfadd : PowerPC_QPX_FFF_Intrinsic<"qvfadd">; - def int_ppc_qpx_qvfadds : PowerPC_QPX_FFF_Intrinsic<"qvfadds">; - def int_ppc_qpx_qvfsub : PowerPC_QPX_FFF_Intrinsic<"qvfsub">; - def int_ppc_qpx_qvfsubs : PowerPC_QPX_FFF_Intrinsic<"qvfsubs">; - - // Estimate Instructions - def int_ppc_qpx_qvfre : PowerPC_QPX_FF_Intrinsic<"qvfre">; - def int_ppc_qpx_qvfres : PowerPC_QPX_FF_Intrinsic<"qvfres">; - def int_ppc_qpx_qvfrsqrte : PowerPC_QPX_FF_Intrinsic<"qvfrsqrte">; - def int_ppc_qpx_qvfrsqrtes : PowerPC_QPX_FF_Intrinsic<"qvfrsqrtes">; - - // Multiply Instructions - def int_ppc_qpx_qvfmul : PowerPC_QPX_FFF_Intrinsic<"qvfmul">; - def int_ppc_qpx_qvfmuls : PowerPC_QPX_FFF_Intrinsic<"qvfmuls">; - def int_ppc_qpx_qvfxmul : PowerPC_QPX_FFF_Intrinsic<"qvfxmul">; - def int_ppc_qpx_qvfxmuls : PowerPC_QPX_FFF_Intrinsic<"qvfxmuls">; - - // Multiply-add instructions - def int_ppc_qpx_qvfmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfmadd">; - def int_ppc_qpx_qvfmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfmadds">; - def int_ppc_qpx_qvfnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadd">; - def int_ppc_qpx_qvfnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfnmadds">; - def int_ppc_qpx_qvfmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfmsub">; - def int_ppc_qpx_qvfmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfmsubs">; - def int_ppc_qpx_qvfnmsub : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsub">; - def int_ppc_qpx_qvfnmsubs : PowerPC_QPX_FFFF_Intrinsic<"qvfnmsubs">; - def int_ppc_qpx_qvfxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadd">; - def int_ppc_qpx_qvfxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxmadds">; - def int_ppc_qpx_qvfxxnpmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadd">; - def int_ppc_qpx_qvfxxnpmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxnpmadds">; - def int_ppc_qpx_qvfxxcpnmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadd">; - def int_ppc_qpx_qvfxxcpnmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxcpnmadds">; - def int_ppc_qpx_qvfxxmadd : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadd">; - def int_ppc_qpx_qvfxxmadds : PowerPC_QPX_FFFF_Intrinsic<"qvfxxmadds">; - - // Select Instruction - def int_ppc_qpx_qvfsel : PowerPC_QPX_FFFF_Intrinsic<"qvfsel">; - - // Permute Instruction - def int_ppc_qpx_qvfperm : PowerPC_QPX_FFFF_Intrinsic<"qvfperm">; - - // Convert and Round Instructions - def int_ppc_qpx_qvfctid : PowerPC_QPX_FF_Intrinsic<"qvfctid">; - def int_ppc_qpx_qvfctidu : PowerPC_QPX_FF_Intrinsic<"qvfctidu">; - def int_ppc_qpx_qvfctidz : PowerPC_QPX_FF_Intrinsic<"qvfctidz">; - def int_ppc_qpx_qvfctiduz : PowerPC_QPX_FF_Intrinsic<"qvfctiduz">; - def int_ppc_qpx_qvfctiw : PowerPC_QPX_FF_Intrinsic<"qvfctiw">; - def int_ppc_qpx_qvfctiwu : PowerPC_QPX_FF_Intrinsic<"qvfctiwu">; - def int_ppc_qpx_qvfctiwz : PowerPC_QPX_FF_Intrinsic<"qvfctiwz">; - def int_ppc_qpx_qvfctiwuz : PowerPC_QPX_FF_Intrinsic<"qvfctiwuz">; - def int_ppc_qpx_qvfcfid : PowerPC_QPX_FF_Intrinsic<"qvfcfid">; - def int_ppc_qpx_qvfcfidu : PowerPC_QPX_FF_Intrinsic<"qvfcfidu">; - def int_ppc_qpx_qvfcfids : PowerPC_QPX_FF_Intrinsic<"qvfcfids">; - def int_ppc_qpx_qvfcfidus : PowerPC_QPX_FF_Intrinsic<"qvfcfidus">; - def int_ppc_qpx_qvfrsp : PowerPC_QPX_FF_Intrinsic<"qvfrsp">; - def int_ppc_qpx_qvfriz : PowerPC_QPX_FF_Intrinsic<"qvfriz">; - def int_ppc_qpx_qvfrin : PowerPC_QPX_FF_Intrinsic<"qvfrin">; - def int_ppc_qpx_qvfrip : PowerPC_QPX_FF_Intrinsic<"qvfrip">; - def int_ppc_qpx_qvfrim : PowerPC_QPX_FF_Intrinsic<"qvfrim">; - - // Move Instructions - def int_ppc_qpx_qvfneg : PowerPC_QPX_FF_Intrinsic<"qvfneg">; - def int_ppc_qpx_qvfabs : PowerPC_QPX_FF_Intrinsic<"qvfabs">; - def int_ppc_qpx_qvfnabs : PowerPC_QPX_FF_Intrinsic<"qvfnabs">; - def int_ppc_qpx_qvfcpsgn : PowerPC_QPX_FFF_Intrinsic<"qvfcpsgn">; - - // Compare Instructions - def int_ppc_qpx_qvftstnan : PowerPC_QPX_FFF_Intrinsic<"qvftstnan">; - def int_ppc_qpx_qvfcmplt : PowerPC_QPX_FFF_Intrinsic<"qvfcmplt">; - def int_ppc_qpx_qvfcmpgt : PowerPC_QPX_FFF_Intrinsic<"qvfcmpgt">; - def int_ppc_qpx_qvfcmpeq : PowerPC_QPX_FFF_Intrinsic<"qvfcmpeq">; - - // Load instructions - def int_ppc_qpx_qvlfd : PowerPC_QPX_Load_Intrinsic<"qvlfd">; - def int_ppc_qpx_qvlfda : PowerPC_QPX_Load_Intrinsic<"qvlfda">; - def int_ppc_qpx_qvlfs : PowerPC_QPX_Load_Intrinsic<"qvlfs">; - def int_ppc_qpx_qvlfsa : PowerPC_QPX_Load_Intrinsic<"qvlfsa">; - - def int_ppc_qpx_qvlfcda : PowerPC_QPX_Load_Intrinsic<"qvlfcda">; - def int_ppc_qpx_qvlfcd : PowerPC_QPX_Load_Intrinsic<"qvlfcd">; - def int_ppc_qpx_qvlfcsa : PowerPC_QPX_Load_Intrinsic<"qvlfcsa">; - def int_ppc_qpx_qvlfcs : PowerPC_QPX_Load_Intrinsic<"qvlfcs">; - def int_ppc_qpx_qvlfiwaa : PowerPC_QPX_Load_Intrinsic<"qvlfiwaa">; - def int_ppc_qpx_qvlfiwa : PowerPC_QPX_Load_Intrinsic<"qvlfiwa">; - def int_ppc_qpx_qvlfiwza : PowerPC_QPX_Load_Intrinsic<"qvlfiwza">; - def int_ppc_qpx_qvlfiwz : PowerPC_QPX_Load_Intrinsic<"qvlfiwz">; - - def int_ppc_qpx_qvlpcld : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcld">; - def int_ppc_qpx_qvlpcls : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcls">; - def int_ppc_qpx_qvlpcrd : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrd">; - def int_ppc_qpx_qvlpcrs : PowerPC_QPX_LoadPerm_Intrinsic<"qvlpcrs">; - - // Store instructions - def int_ppc_qpx_qvstfd : PowerPC_QPX_Store_Intrinsic<"qvstfd">; - def int_ppc_qpx_qvstfda : PowerPC_QPX_Store_Intrinsic<"qvstfda">; - def int_ppc_qpx_qvstfs : PowerPC_QPX_Store_Intrinsic<"qvstfs">; - def int_ppc_qpx_qvstfsa : PowerPC_QPX_Store_Intrinsic<"qvstfsa">; - - def int_ppc_qpx_qvstfcda : PowerPC_QPX_Store_Intrinsic<"qvstfcda">; - def int_ppc_qpx_qvstfcd : PowerPC_QPX_Store_Intrinsic<"qvstfcd">; - def int_ppc_qpx_qvstfcsa : PowerPC_QPX_Store_Intrinsic<"qvstfcsa">; - def int_ppc_qpx_qvstfcs : PowerPC_QPX_Store_Intrinsic<"qvstfcs">; - def int_ppc_qpx_qvstfiwa : PowerPC_QPX_Store_Intrinsic<"qvstfiwa">; - def int_ppc_qpx_qvstfiw : PowerPC_QPX_Store_Intrinsic<"qvstfiw">; - - // Logical and permutation formation - def int_ppc_qpx_qvflogical : PowerPC_QPX_Intrinsic<"qvflogical", - [llvm_v4f64_ty], - [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty], - [IntrNoMem]>; - def int_ppc_qpx_qvgpci : PowerPC_QPX_Intrinsic<"qvgpci", - [llvm_v4f64_ty], [llvm_i32_ty], [IntrNoMem]>; -} - -//===----------------------------------------------------------------------===// // PowerPC HTM Intrinsic Definitions. let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". @@ -1349,5 +1410,88 @@ def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>; // PowerPC set FPSCR Intrinsic Definitions. def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">, Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>; +} +let TargetPrefix = "ppc" in { + def int_ppc_vsx_assemble_pair : + Intrinsic<[llvm_v256i1_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + + def int_ppc_vsx_disassemble_pair : + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty], + [llvm_v256i1_ty], [IntrNoMem]>; + + def int_ppc_mma_assemble_acc : + Intrinsic<[llvm_v512i1_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + + def int_ppc_mma_disassemble_acc : + Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [llvm_v512i1_ty], [IntrNoMem]>; + + def int_ppc_mma_xxmtacc : + Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>; + + def int_ppc_mma_xxmfacc : + Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>; + + def int_ppc_mma_xxsetaccz : + Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>; + + // MMA Reduced-Precision: Outer Product Intrinsic Definitions. + defm int_ppc_mma_xvi4ger8 : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvi4ger8 : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty]>; + + defm int_ppc_mma_xvi8ger4 : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvi8ger4 : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty]>; + + defm int_ppc_mma_xvi16ger2s : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvi16ger2s : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty]>; + + defm int_ppc_mma_xvf16ger2 : + PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvf16ger2 : + PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty]>; + defm int_ppc_mma_xvf32ger : + PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvf32ger : + PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty]>; + defm int_ppc_mma_xvf64ger : + PowerPC_MMA_ACC_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvf64ger : + PowerPC_MMA_ACC_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty]>; + + // MMA Reduced-Precision: bfloat16 Outer Product Intrinsic Definitions. + defm int_ppc_mma_xvbf16ger2 : + PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvbf16ger2 : + PowerPC_MMA_ACC_Intrinsic< + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>; + + // MMA Reduced-Precision: Missing Integer-based Outer Product Operations. + defm int_ppc_mma_xvi16ger2 : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>; + defm int_ppc_mma_pmxvi16ger2 : + PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty]>; + def int_ppc_mma_xvi8ger4spp : + Intrinsic<[llvm_v512i1_ty], + [llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_mma_pmxvi8ger4spp : + Intrinsic<[llvm_v512i1_ty], + [llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 7590b568c367..ab5b09b72ac3 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -66,3 +66,1024 @@ let TargetPrefix = "riscv" in { defm int_riscv_masked_cmpxchg : MaskedAtomicRMWFiveArgIntrinsics; } // TargetPrefix = "riscv" + +//===----------------------------------------------------------------------===// +// Vectors + +class RISCVVIntrinsic { + // These intrinsics may accept illegal integer values in their llvm_any_ty + // operand, so they have to be extended. If set to zero then the intrinsic + // does not have any operand that must be extended. + Intrinsic IntrinsicID = !cast<Intrinsic>(NAME); + bits<4> ExtendOperand = 0; +} + +let TargetPrefix = "riscv" in { + // We use anyint here but we only support XLen. + def int_riscv_vsetvli : Intrinsic<[llvm_anyint_ty], + /* AVL */ [LLVMMatchType<0>, + /* VSEW */ LLVMMatchType<0>, + /* VLMUL */ LLVMMatchType<0>], + [IntrNoMem, IntrHasSideEffects, + ImmArg<ArgIndex<1>>, + ImmArg<ArgIndex<2>>]>; + def int_riscv_vsetvlimax : Intrinsic<[llvm_anyint_ty], + /* VSEW */ [LLVMMatchType<0>, + /* VLMUL */ LLVMMatchType<0>], + [IntrNoMem, IntrHasSideEffects, + ImmArg<ArgIndex<0>>, + ImmArg<ArgIndex<1>>]>; + + // For unit stride load + // Input: (pointer, vl) + class RISCVUSLoad + : Intrinsic<[llvm_anyvector_ty], + [LLVMPointerType<LLVMMatchType<0>>, + llvm_anyint_ty], + [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic; + // For unit stride fault-only-first load + // Input: (pointer, vl) + // Output: (data, vl) + // NOTE: We model this with default memory properties since we model writing + // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work. + class RISCVUSLoadFF + : Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty], + [LLVMPointerType<LLVMMatchType<0>>, LLVMMatchType<1>], + [NoCapture<ArgIndex<0>>]>, + RISCVVIntrinsic; + // For unit stride load with mask + // Input: (maskedoff, pointer, mask, vl) + class RISCVUSLoadMask + : Intrinsic<[llvm_anyvector_ty ], + [LLVMMatchType<0>, + LLVMPointerType<LLVMMatchType<0>>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty], + [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic; + // For unit stride fault-only-first load with mask + // Input: (maskedoff, pointer, mask, vl) + // Output: (data, vl) + // NOTE: We model this with default memory properties since we model writing + // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work. + class RISCVUSLoadFFMask + : Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty], + [LLVMMatchType<0>, + LLVMPointerType<LLVMMatchType<0>>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<1>], + [NoCapture<ArgIndex<1>>]>, RISCVVIntrinsic; + // For strided load + // Input: (pointer, stride, vl) + class RISCVSLoad + : Intrinsic<[llvm_anyvector_ty], + [LLVMPointerType<LLVMMatchType<0>>, + llvm_anyint_ty, LLVMMatchType<1>], + [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic; + // For strided load with mask + // Input: (maskedoff, pointer, stride, mask, vl) + class RISCVSLoadMask + : Intrinsic<[llvm_anyvector_ty ], + [LLVMMatchType<0>, + LLVMPointerType<LLVMMatchType<0>>, llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>], + [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic; + // For indexed load + // Input: (pointer, index, vl) + class RISCVILoad + : Intrinsic<[llvm_anyvector_ty], + [LLVMPointerType<LLVMMatchType<0>>, + llvm_anyvector_ty, llvm_anyint_ty], + [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic; + // For indexed load with mask + // Input: (maskedoff, pointer, index, mask, vl) + class RISCVILoadMask + : Intrinsic<[llvm_anyvector_ty ], + [LLVMMatchType<0>, + LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic; + // For unit stride store + // Input: (vector_in, pointer, vl) + class RISCVUSStore + : Intrinsic<[], + [llvm_anyvector_ty, + LLVMPointerType<LLVMMatchType<0>>, + llvm_anyint_ty], + [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic; + // For unit stride store with mask + // Input: (vector_in, pointer, mask, vl) + class RISCVUSStoreMask + : Intrinsic<[], + [llvm_anyvector_ty, + LLVMPointerType<LLVMMatchType<0>>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty], + [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic; + // For strided store + // Input: (vector_in, pointer, stride, vl) + class RISCVSStore + : Intrinsic<[], + [llvm_anyvector_ty, + LLVMPointerType<LLVMMatchType<0>>, + llvm_anyint_ty, LLVMMatchType<1>], + [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic; + // For stride store with mask + // Input: (vector_in, pointer, stirde, mask, vl) + class RISCVSStoreMask + : Intrinsic<[], + [llvm_anyvector_ty, + LLVMPointerType<LLVMMatchType<0>>, llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>], + [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic; + // For indexed store + // Input: (vector_in, pointer, index, vl) + class RISCVIStore + : Intrinsic<[], + [llvm_anyvector_ty, + LLVMPointerType<LLVMMatchType<0>>, + llvm_anyint_ty, llvm_anyint_ty], + [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic; + // For indexed store with mask + // Input: (vector_in, pointer, index, mask, vl) + class RISCVIStoreMask + : Intrinsic<[], + [llvm_anyvector_ty, + LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic; + // For destination vector type is the same as source vector. + // Input: (vector_in, vl) + class RISCVUnaryAANoMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For destination vector type is the same as first source vector (with mask). + // Input: (vector_in, mask, vl) + class RISCVUnaryAAMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For destination vector type is the same as first and second source vector. + // Input: (vector_in, vector_in, vl) + class RISCVBinaryAAANoMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For destination vector type is the same as first and second source vector. + // Input: (vector_in, vector_in, vl) + class RISCVBinaryAAAMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For destination vector type is the same as first source vector. + // Input: (vector_in, vector_in/scalar_in, vl) + class RISCVBinaryAAXNoMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + // For destination vector type is the same as first source vector (with mask). + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) + class RISCVBinaryAAXMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 3; + } + // For destination vector type is NOT the same as first source vector. + // Input: (vector_in, vector_in/scalar_in, vl) + class RISCVBinaryABXNoMask + : Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + // For destination vector type is NOT the same as first source vector (with mask). + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) + class RISCVBinaryABXMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 3; + } + // For binary operations with V0 as input. + // Input: (vector_in, vector_in/scalar_in, V0, vl) + class RISCVBinaryWithV0 + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_any_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + // For binary operations with mask type output and V0 as input. + // Output: (mask type output) + // Input: (vector_in, vector_in/scalar_in, V0, vl) + class RISCVBinaryMOutWithV0 + :Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [llvm_anyvector_ty, llvm_any_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + // For binary operations with mask type output. + // Output: (mask type output) + // Input: (vector_in, vector_in/scalar_in, vl) + class RISCVBinaryMOut + : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + // For binary operations with mask type output without mask. + // Output: (mask type output) + // Input: (vector_in, vector_in/scalar_in, vl) + class RISCVCompareNoMask + : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + // For binary operations with mask type output with mask. + // Output: (mask type output) + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) + class RISCVCompareMask + : Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty, llvm_any_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 3; + } + // For FP classify operations. + // Output: (bit mask type output) + // Input: (vector_in, vl) + class RISCVClassifyNoMask + : Intrinsic<[LLVMVectorOfBitcastsToInt<0>], + [llvm_anyvector_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For FP classify operations with mask. + // Output: (bit mask type output) + // Input: (maskedoff, vector_in, mask, vl) + class RISCVClassifyMask + : Intrinsic<[LLVMVectorOfBitcastsToInt<0>], + [LLVMVectorOfBitcastsToInt<0>, llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For Saturating binary operations. + // The destination vector type is the same as first source vector. + // Input: (vector_in, vector_in/scalar_in, vl) + class RISCVSaturatingBinaryAAXNoMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + // For Saturating binary operations with mask. + // The destination vector type is the same as first source vector. + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) + class RISCVSaturatingBinaryAAXMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { + let ExtendOperand = 3; + } + // For Saturating binary operations. + // The destination vector type is NOT the same as first source vector. + // Input: (vector_in, vector_in/scalar_in, vl) + class RISCVSaturatingBinaryABXNoMask + : Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, llvm_any_ty, llvm_anyint_ty], + [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + // For Saturating binary operations with mask. + // The destination vector type is NOT the same as first source vector (with mask). + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) + class RISCVSaturatingBinaryABXMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { + let ExtendOperand = 3; + } + class RISCVTernaryAAAXNoMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, + LLVMMatchType<1>], + [IntrNoMem]>, RISCVVIntrinsic; + class RISCVTernaryAAAXMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>], + [IntrNoMem]>, RISCVVIntrinsic; + class RISCVTernaryAAXANoMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_any_ty, LLVMMatchType<0>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + class RISCVTernaryAAXAMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_any_ty, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + class RISCVTernaryWideNoMask + : Intrinsic< [llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_any_ty, llvm_anyvector_ty, + llvm_anyint_ty], + [IntrNoMem] >, RISCVVIntrinsic { + let ExtendOperand = 2; + } + class RISCVTernaryWideMask + : Intrinsic< [llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_any_ty, llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + // For Reduction ternary operations. + // For destination vector type is the same as first and third source vector. + // Input: (vector_in, vector_in, vector_in, vl) + class RISCVReductionNoMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For Reduction ternary operations with mask. + // For destination vector type is the same as first and third source vector. + // The mask type come from second source vector. + // Input: (maskedoff, vector_in, vector_in, vector_in, mask, vl) + class RISCVReductionMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For unary operations with scalar type output without mask + // Output: (scalar type) + // Input: (vector_in, vl) + class RISCVMaskUnarySOutNoMask + : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>, RISCVVIntrinsic; + // For unary operations with scalar type output with mask + // Output: (scalar type) + // Input: (vector_in, mask, vl) + class RISCVMaskUnarySOutMask + : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<0>], + [IntrNoMem]>, RISCVVIntrinsic; + // For destination vector type is NOT the same as source vector. + // Input: (vector_in, vl) + class RISCVUnaryABNoMask + : Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For destination vector type is NOT the same as source vector (with mask). + // Input: (maskedoff, vector_in, mask, vl) + class RISCVUnaryABMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For unary operations with the same vector type in/out without mask + // Output: (vector) + // Input: (vector_in, vl) + class RISCVUnaryNoMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For mask unary operations with mask type in/out with mask + // Output: (mask type output) + // Input: (mask type maskedoff, mask type vector_in, mask, vl) + class RISCVMaskUnaryMOutMask + : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // Output: (vector) + // Input: (vl) + class RISCVNullaryIntrinsic + : Intrinsic<[llvm_anyvector_ty], + [llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For Conversion unary operations. + // Input: (vector_in, vl) + class RISCVConversionNoMask + : Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For Conversion unary operations with mask. + // Input: (maskedoff, vector_in, mask, vl) + class RISCVConversionMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // For atomic operations without mask + // Input: (base, index, value, vl) + class RISCVAMONoMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty, LLVMMatchType<0>, + llvm_anyint_ty], + [NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic; + // For atomic operations with mask + // Input: (base, index, value, mask, vl) + class RISCVAMOMask + : Intrinsic<[llvm_anyvector_ty], + [LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], + [NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic; + + // For unit stride segment load + // Input: (pointer, vl) + class RISCVUSSegLoad<int nf> + : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, + !add(nf, -1))), + [LLVMPointerToElt<0>, llvm_anyint_ty], + [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic; + // For unit stride segment load with mask + // Input: (maskedoff, pointer, mask, vl) + class RISCVUSSegLoadMask<int nf> + : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, + !add(nf, -1))), + !listconcat(!listsplat(LLVMMatchType<0>, nf), + [LLVMPointerToElt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty]), + [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic; + + // For unit stride fault-only-first segment load + // Input: (pointer, vl) + // Output: (data, vl) + // NOTE: We model this with default memory properties since we model writing + // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work. + class RISCVUSSegLoadFF<int nf> + : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, + !add(nf, -1)), [llvm_anyint_ty]), + [LLVMPointerToElt<0>, LLVMMatchType<1>], + [NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic; + // For unit stride fault-only-first segment load with mask + // Input: (maskedoff, pointer, mask, vl) + // Output: (data, vl) + // NOTE: We model this with default memory properties since we model writing + // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work. + class RISCVUSSegLoadFFMask<int nf> + : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, + !add(nf, -1)), [llvm_anyint_ty]), + !listconcat(!listsplat(LLVMMatchType<0>, nf), + [LLVMPointerToElt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<1>]), + [NoCapture<ArgIndex<nf>>]>, RISCVVIntrinsic; + + // For stride segment load + // Input: (pointer, offset, vl) + class RISCVSSegLoad<int nf> + : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, + !add(nf, -1))), + [LLVMPointerToElt<0>, llvm_anyint_ty, LLVMMatchType<1>], + [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic; + // For stride segment load with mask + // Input: (maskedoff, pointer, offset, mask, vl) + class RISCVSSegLoadMask<int nf> + : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, + !add(nf, -1))), + !listconcat(!listsplat(LLVMMatchType<0>, nf), + [LLVMPointerToElt<0>, + llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<1>]), + [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic; + + // For indexed segment load + // Input: (pointer, index, vl) + class RISCVISegLoad<int nf> + : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, + !add(nf, -1))), + [LLVMPointerToElt<0>, llvm_anyvector_ty, llvm_anyint_ty], + [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic; + // For indexed segment load with mask + // Input: (maskedoff, pointer, index, mask, vl) + class RISCVISegLoadMask<int nf> + : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, + !add(nf, -1))), + !listconcat(!listsplat(LLVMMatchType<0>, nf), + [LLVMPointerToElt<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty]), + [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic; + + // For unit stride segment store + // Input: (value, pointer, vl) + class RISCVUSSegStore<int nf> + : Intrinsic<[], + !listconcat([llvm_anyvector_ty], + !listsplat(LLVMMatchType<0>, !add(nf, -1)), + [LLVMPointerToElt<0>, llvm_anyint_ty]), + [NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic; + // For unit stride segment store with mask + // Input: (value, pointer, mask, vl) + class RISCVUSSegStoreMask<int nf> + : Intrinsic<[], + !listconcat([llvm_anyvector_ty], + !listsplat(LLVMMatchType<0>, !add(nf, -1)), + [LLVMPointerToElt<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty]), + [NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic; + + // For stride segment store + // Input: (value, pointer, offset, vl) + class RISCVSSegStore<int nf> + : Intrinsic<[], + !listconcat([llvm_anyvector_ty], + !listsplat(LLVMMatchType<0>, !add(nf, -1)), + [LLVMPointerToElt<0>, llvm_anyint_ty, + LLVMMatchType<1>]), + [NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic; + // For stride segment store with mask + // Input: (value, pointer, offset, mask, vl) + class RISCVSSegStoreMask<int nf> + : Intrinsic<[], + !listconcat([llvm_anyvector_ty], + !listsplat(LLVMMatchType<0>, !add(nf, -1)), + [LLVMPointerToElt<0>, llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<1>]), + [NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic; + + // For indexed segment store + // Input: (value, pointer, offset, vl) + class RISCVISegStore<int nf> + : Intrinsic<[], + !listconcat([llvm_anyvector_ty], + !listsplat(LLVMMatchType<0>, !add(nf, -1)), + [LLVMPointerToElt<0>, llvm_anyvector_ty, + llvm_anyint_ty]), + [NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic; + // For indexed segment store with mask + // Input: (value, pointer, offset, mask, vl) + class RISCVISegStoreMask<int nf> + : Intrinsic<[], + !listconcat([llvm_anyvector_ty], + !listsplat(LLVMMatchType<0>, !add(nf, -1)), + [LLVMPointerToElt<0>, llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty]), + [NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic; + + multiclass RISCVUSLoad { + def "int_riscv_" # NAME : RISCVUSLoad; + def "int_riscv_" # NAME # "_mask" : RISCVUSLoadMask; + } + multiclass RISCVUSLoadFF { + def "int_riscv_" # NAME : RISCVUSLoadFF; + def "int_riscv_" # NAME # "_mask" : RISCVUSLoadFFMask; + } + multiclass RISCVSLoad { + def "int_riscv_" # NAME : RISCVSLoad; + def "int_riscv_" # NAME # "_mask" : RISCVSLoadMask; + } + multiclass RISCVILoad { + def "int_riscv_" # NAME : RISCVILoad; + def "int_riscv_" # NAME # "_mask" : RISCVILoadMask; + } + multiclass RISCVUSStore { + def "int_riscv_" # NAME : RISCVUSStore; + def "int_riscv_" # NAME # "_mask" : RISCVUSStoreMask; + } + multiclass RISCVSStore { + def "int_riscv_" # NAME : RISCVSStore; + def "int_riscv_" # NAME # "_mask" : RISCVSStoreMask; + } + + multiclass RISCVIStore { + def "int_riscv_" # NAME : RISCVIStore; + def "int_riscv_" # NAME # "_mask" : RISCVIStoreMask; + } + multiclass RISCVUnaryAA { + def "int_riscv_" # NAME : RISCVUnaryAANoMask; + def "int_riscv_" # NAME # "_mask" : RISCVUnaryAAMask; + } + multiclass RISCVUnaryAB { + def "int_riscv_" # NAME : RISCVUnaryABNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVUnaryABMask; + } + // AAX means the destination type(A) is the same as the first source + // type(A). X means any type for the second source operand. + multiclass RISCVBinaryAAX { + def "int_riscv_" # NAME : RISCVBinaryAAXNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAXMask; + } + // ABX means the destination type(A) is different from the first source + // type(B). X means any type for the second source operand. + multiclass RISCVBinaryABX { + def "int_riscv_" # NAME : RISCVBinaryABXNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVBinaryABXMask; + } + multiclass RISCVBinaryWithV0 { + def "int_riscv_" # NAME : RISCVBinaryWithV0; + } + multiclass RISCVBinaryMaskOutWithV0 { + def "int_riscv_" # NAME : RISCVBinaryMOutWithV0; + } + multiclass RISCVBinaryMaskOut { + def "int_riscv_" # NAME : RISCVBinaryMOut; + } + multiclass RISCVSaturatingBinaryAAX { + def "int_riscv_" # NAME : RISCVSaturatingBinaryAAXNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAXMask; + } + multiclass RISCVSaturatingBinaryABX { + def "int_riscv_" # NAME : RISCVSaturatingBinaryABXNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryABXMask; + } + multiclass RISCVTernaryAAAX { + def "int_riscv_" # NAME : RISCVTernaryAAAXNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAAXMask; + } + multiclass RISCVTernaryAAXA { + def "int_riscv_" # NAME : RISCVTernaryAAXANoMask; + def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAXAMask; + } + multiclass RISCVCompare { + def "int_riscv_" # NAME : RISCVCompareNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVCompareMask; + } + multiclass RISCVClassify { + def "int_riscv_" # NAME : RISCVClassifyNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVClassifyMask; + } + multiclass RISCVTernaryWide { + def "int_riscv_" # NAME : RISCVTernaryWideNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVTernaryWideMask; + } + multiclass RISCVReduction { + def "int_riscv_" # NAME : RISCVReductionNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVReductionMask; + } + multiclass RISCVMaskUnarySOut { + def "int_riscv_" # NAME : RISCVMaskUnarySOutNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVMaskUnarySOutMask; + } + multiclass RISCVMaskUnaryMOut { + def "int_riscv_" # NAME : RISCVUnaryNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVMaskUnaryMOutMask; + } + multiclass RISCVConversion { + def "int_riscv_" #NAME :RISCVConversionNoMask; + def "int_riscv_" # NAME # "_mask" : RISCVConversionMask; + } + multiclass RISCVAMO { + def "int_riscv_" # NAME : RISCVAMONoMask; + def "int_riscv_" # NAME # "_mask" : RISCVAMOMask; + } + multiclass RISCVUSSegLoad<int nf> { + def "int_riscv_" # NAME : RISCVUSSegLoad<nf>; + def "int_riscv_" # NAME # "_mask" : RISCVUSSegLoadMask<nf>; + } + multiclass RISCVUSSegLoadFF<int nf> { + def "int_riscv_" # NAME : RISCVUSSegLoadFF<nf>; + def "int_riscv_" # NAME # "_mask" : RISCVUSSegLoadFFMask<nf>; + } + multiclass RISCVSSegLoad<int nf> { + def "int_riscv_" # NAME : RISCVSSegLoad<nf>; + def "int_riscv_" # NAME # "_mask" : RISCVSSegLoadMask<nf>; + } + multiclass RISCVISegLoad<int nf> { + def "int_riscv_" # NAME : RISCVISegLoad<nf>; + def "int_riscv_" # NAME # "_mask" : RISCVISegLoadMask<nf>; + } + multiclass RISCVUSSegStore<int nf> { + def "int_riscv_" # NAME : RISCVUSSegStore<nf>; + def "int_riscv_" # NAME # "_mask" : RISCVUSSegStoreMask<nf>; + } + multiclass RISCVSSegStore<int nf> { + def "int_riscv_" # NAME : RISCVSSegStore<nf>; + def "int_riscv_" # NAME # "_mask" : RISCVSSegStoreMask<nf>; + } + multiclass RISCVISegStore<int nf> { + def "int_riscv_" # NAME : RISCVISegStore<nf>; + def "int_riscv_" # NAME # "_mask" : RISCVISegStoreMask<nf>; + } + + defm vle : RISCVUSLoad; + defm vleff : RISCVUSLoadFF; + defm vse : RISCVUSStore; + defm vlse: RISCVSLoad; + defm vsse: RISCVSStore; + defm vluxei : RISCVILoad; + defm vloxei : RISCVILoad; + defm vsoxei : RISCVIStore; + defm vsuxei : RISCVIStore; + + defm vamoswap : RISCVAMO; + defm vamoadd : RISCVAMO; + defm vamoxor : RISCVAMO; + defm vamoand : RISCVAMO; + defm vamoor : RISCVAMO; + defm vamomin : RISCVAMO; + defm vamomax : RISCVAMO; + defm vamominu : RISCVAMO; + defm vamomaxu : RISCVAMO; + + defm vadd : RISCVBinaryAAX; + defm vsub : RISCVBinaryAAX; + defm vrsub : RISCVBinaryAAX; + + defm vwaddu : RISCVBinaryABX; + defm vwadd : RISCVBinaryABX; + defm vwaddu_w : RISCVBinaryAAX; + defm vwadd_w : RISCVBinaryAAX; + defm vwsubu : RISCVBinaryABX; + defm vwsub : RISCVBinaryABX; + defm vwsubu_w : RISCVBinaryAAX; + defm vwsub_w : RISCVBinaryAAX; + + defm vzext : RISCVUnaryAB; + defm vsext : RISCVUnaryAB; + + defm vadc : RISCVBinaryWithV0; + defm vmadc_carry_in : RISCVBinaryMaskOutWithV0; + defm vmadc : RISCVBinaryMaskOut; + + defm vsbc : RISCVBinaryWithV0; + defm vmsbc_borrow_in : RISCVBinaryMaskOutWithV0; + defm vmsbc : RISCVBinaryMaskOut; + + defm vand : RISCVBinaryAAX; + defm vor : RISCVBinaryAAX; + defm vxor : RISCVBinaryAAX; + + defm vsll : RISCVBinaryAAX; + defm vsrl : RISCVBinaryAAX; + defm vsra : RISCVBinaryAAX; + + defm vnsrl : RISCVBinaryABX; + defm vnsra : RISCVBinaryABX; + + defm vmseq : RISCVCompare; + defm vmsne : RISCVCompare; + defm vmsltu : RISCVCompare; + defm vmslt : RISCVCompare; + defm vmsleu : RISCVCompare; + defm vmsle : RISCVCompare; + defm vmsgtu : RISCVCompare; + defm vmsgt : RISCVCompare; + + defm vminu : RISCVBinaryAAX; + defm vmin : RISCVBinaryAAX; + defm vmaxu : RISCVBinaryAAX; + defm vmax : RISCVBinaryAAX; + + defm vmul : RISCVBinaryAAX; + defm vmulh : RISCVBinaryAAX; + defm vmulhu : RISCVBinaryAAX; + defm vmulhsu : RISCVBinaryAAX; + + defm vdivu : RISCVBinaryAAX; + defm vdiv : RISCVBinaryAAX; + defm vremu : RISCVBinaryAAX; + defm vrem : RISCVBinaryAAX; + + defm vwmul : RISCVBinaryABX; + defm vwmulu : RISCVBinaryABX; + defm vwmulsu : RISCVBinaryABX; + + defm vmacc : RISCVTernaryAAXA; + defm vnmsac : RISCVTernaryAAXA; + defm vmadd : RISCVTernaryAAXA; + defm vnmsub : RISCVTernaryAAXA; + + defm vwmaccu : RISCVTernaryWide; + defm vwmacc : RISCVTernaryWide; + defm vwmaccus : RISCVTernaryWide; + defm vwmaccsu : RISCVTernaryWide; + + defm vfadd : RISCVBinaryAAX; + defm vfsub : RISCVBinaryAAX; + defm vfrsub : RISCVBinaryAAX; + + defm vfwadd : RISCVBinaryABX; + defm vfwsub : RISCVBinaryABX; + defm vfwadd_w : RISCVBinaryAAX; + defm vfwsub_w : RISCVBinaryAAX; + + defm vsaddu : RISCVSaturatingBinaryAAX; + defm vsadd : RISCVSaturatingBinaryAAX; + defm vssubu : RISCVSaturatingBinaryAAX; + defm vssub : RISCVSaturatingBinaryAAX; + + def int_riscv_vmerge : RISCVBinaryWithV0; + + def int_riscv_vmv_v_v : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + def int_riscv_vmv_v_x : Intrinsic<[llvm_anyint_ty], + [LLVMVectorElementType<0>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 1; + } + def int_riscv_vfmv_v_f : Intrinsic<[llvm_anyfloat_ty], + [LLVMVectorElementType<0>, llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + + def int_riscv_vmv_x_s : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + def int_riscv_vmv_s_x : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMVectorElementType<0>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic { + let ExtendOperand = 2; + } + + def int_riscv_vfmv_f_s : Intrinsic<[LLVMVectorElementType<0>], + [llvm_anyfloat_ty], + [IntrNoMem]>, RISCVVIntrinsic; + def int_riscv_vfmv_s_f : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMVectorElementType<0>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + + defm vfmul : RISCVBinaryAAX; + defm vfdiv : RISCVBinaryAAX; + defm vfrdiv : RISCVBinaryAAX; + + defm vfwmul : RISCVBinaryABX; + + defm vfmacc : RISCVTernaryAAXA; + defm vfnmacc : RISCVTernaryAAXA; + defm vfmsac : RISCVTernaryAAXA; + defm vfnmsac : RISCVTernaryAAXA; + defm vfmadd : RISCVTernaryAAXA; + defm vfnmadd : RISCVTernaryAAXA; + defm vfmsub : RISCVTernaryAAXA; + defm vfnmsub : RISCVTernaryAAXA; + + defm vfwmacc : RISCVTernaryWide; + defm vfwnmacc : RISCVTernaryWide; + defm vfwmsac : RISCVTernaryWide; + defm vfwnmsac : RISCVTernaryWide; + + defm vfsqrt : RISCVUnaryAA; + defm vfrsqrte7 : RISCVUnaryAA; + defm vfrece7 : RISCVUnaryAA; + + defm vfmin : RISCVBinaryAAX; + defm vfmax : RISCVBinaryAAX; + + defm vfsgnj : RISCVBinaryAAX; + defm vfsgnjn : RISCVBinaryAAX; + defm vfsgnjx : RISCVBinaryAAX; + + defm vfclass : RISCVClassify; + + defm vfmerge : RISCVBinaryWithV0; + + defm vslideup : RISCVTernaryAAAX; + defm vslidedown : RISCVTernaryAAAX; + + defm vslide1up : RISCVBinaryAAX; + defm vslide1down : RISCVBinaryAAX; + defm vfslide1up : RISCVBinaryAAX; + defm vfslide1down : RISCVBinaryAAX; + + defm vrgather : RISCVBinaryAAX; + defm vrgatherei16 : RISCVBinaryAAX; + + def "int_riscv_vcompress" : RISCVBinaryAAAMask; + + defm vaaddu : RISCVSaturatingBinaryAAX; + defm vaadd : RISCVSaturatingBinaryAAX; + defm vasubu : RISCVSaturatingBinaryAAX; + defm vasub : RISCVSaturatingBinaryAAX; + + defm vsmul : RISCVSaturatingBinaryAAX; + + defm vssrl : RISCVSaturatingBinaryAAX; + defm vssra : RISCVSaturatingBinaryAAX; + + defm vnclipu : RISCVSaturatingBinaryABX; + defm vnclip : RISCVSaturatingBinaryABX; + + defm vmfeq : RISCVCompare; + defm vmfne : RISCVCompare; + defm vmflt : RISCVCompare; + defm vmfle : RISCVCompare; + defm vmfgt : RISCVCompare; + defm vmfge : RISCVCompare; + + defm vredsum : RISCVReduction; + defm vredand : RISCVReduction; + defm vredor : RISCVReduction; + defm vredxor : RISCVReduction; + defm vredminu : RISCVReduction; + defm vredmin : RISCVReduction; + defm vredmaxu : RISCVReduction; + defm vredmax : RISCVReduction; + + defm vwredsumu : RISCVReduction; + defm vwredsum : RISCVReduction; + + defm vfredosum : RISCVReduction; + defm vfredsum : RISCVReduction; + defm vfredmin : RISCVReduction; + defm vfredmax : RISCVReduction; + + defm vfwredsum : RISCVReduction; + defm vfwredosum : RISCVReduction; + + def int_riscv_vmand: RISCVBinaryAAANoMask; + def int_riscv_vmnand: RISCVBinaryAAANoMask; + def int_riscv_vmandnot: RISCVBinaryAAANoMask; + def int_riscv_vmxor: RISCVBinaryAAANoMask; + def int_riscv_vmor: RISCVBinaryAAANoMask; + def int_riscv_vmnor: RISCVBinaryAAANoMask; + def int_riscv_vmornot: RISCVBinaryAAANoMask; + def int_riscv_vmxnor: RISCVBinaryAAANoMask; + def int_riscv_vmclr : RISCVNullaryIntrinsic; + def int_riscv_vmset : RISCVNullaryIntrinsic; + + defm vpopc : RISCVMaskUnarySOut; + defm vfirst : RISCVMaskUnarySOut; + defm vmsbf : RISCVMaskUnaryMOut; + defm vmsof : RISCVMaskUnaryMOut; + defm vmsif : RISCVMaskUnaryMOut; + + defm vfcvt_xu_f_v : RISCVConversion; + defm vfcvt_x_f_v : RISCVConversion; + defm vfcvt_rtz_xu_f_v : RISCVConversion; + defm vfcvt_rtz_x_f_v : RISCVConversion; + defm vfcvt_f_xu_v : RISCVConversion; + defm vfcvt_f_x_v : RISCVConversion; + + defm vfwcvt_f_xu_v : RISCVConversion; + defm vfwcvt_f_x_v : RISCVConversion; + defm vfwcvt_xu_f_v : RISCVConversion; + defm vfwcvt_x_f_v : RISCVConversion; + defm vfwcvt_rtz_xu_f_v : RISCVConversion; + defm vfwcvt_rtz_x_f_v : RISCVConversion; + defm vfwcvt_f_f_v : RISCVConversion; + + defm vfncvt_f_xu_w : RISCVConversion; + defm vfncvt_f_x_w : RISCVConversion; + defm vfncvt_xu_f_w : RISCVConversion; + defm vfncvt_x_f_w : RISCVConversion; + defm vfncvt_rtz_xu_f_w : RISCVConversion; + defm vfncvt_rtz_x_f_w : RISCVConversion; + defm vfncvt_f_f_w : RISCVConversion; + defm vfncvt_rod_f_f_w : RISCVConversion; + + // Output: (vector) + // Input: (mask type input, vl) + def int_riscv_viota : Intrinsic<[llvm_anyvector_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // Output: (vector) + // Input: (maskedoff, mask type vector_in, mask, vl) + def int_riscv_viota_mask : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + // Output: (vector) + // Input: (vl) + def int_riscv_vid : RISCVNullaryIntrinsic; + + // Output: (vector) + // Input: (maskedoff, mask, vl) + def int_riscv_vid_mask : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty], + [IntrNoMem]>, RISCVVIntrinsic; + + foreach nf = [2, 3, 4, 5, 6, 7, 8] in { + defm vlseg # nf : RISCVUSSegLoad<nf>; + defm vlseg # nf # ff : RISCVUSSegLoadFF<nf>; + defm vlsseg # nf : RISCVSSegLoad<nf>; + defm vloxseg # nf : RISCVISegLoad<nf>; + defm vluxseg # nf : RISCVISegLoad<nf>; + defm vsseg # nf : RISCVUSSegStore<nf>; + defm vssseg # nf : RISCVSSegStore<nf>; + defm vsoxseg # nf : RISCVISegStore<nf>; + defm vsuxseg # nf : RISCVISegStore<nf>; + } + +} // TargetPrefix = "riscv" diff --git a/llvm/include/llvm/IR/IntrinsicsVE.td b/llvm/include/llvm/IR/IntrinsicsVE.td new file mode 100644 index 000000000000..be4bccef0cc1 --- /dev/null +++ b/llvm/include/llvm/IR/IntrinsicsVE.td @@ -0,0 +1,35 @@ +// Define intrinsics written by hand + +// VEL Intrinsic instructions. +let TargetPrefix = "ve" in { + def int_ve_vl_svob : GCCBuiltin<"__builtin_ve_vl_svob">, + Intrinsic<[], [], [IntrHasSideEffects]>; + + def int_ve_vl_pack_f32p : GCCBuiltin<"__builtin_ve_vl_pack_f32p">, + Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty], + [IntrReadMem]>; + def int_ve_vl_pack_f32a : GCCBuiltin<"__builtin_ve_vl_pack_f32a">, + Intrinsic<[llvm_i64_ty], [llvm_ptr_ty], + [IntrReadMem]>; + + def int_ve_vl_extract_vm512u : + GCCBuiltin<"__builtin_ve_vl_extract_vm512u">, + Intrinsic<[LLVMType<v256i1>], [LLVMType<v512i1>], [IntrNoMem]>; + + def int_ve_vl_extract_vm512l : + GCCBuiltin<"__builtin_ve_vl_extract_vm512l">, + Intrinsic<[LLVMType<v256i1>], [LLVMType<v512i1>], [IntrNoMem]>; + + def int_ve_vl_insert_vm512u : + GCCBuiltin<"__builtin_ve_vl_insert_vm512u">, + Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v256i1>], + [IntrNoMem]>; + + def int_ve_vl_insert_vm512l : + GCCBuiltin<"__builtin_ve_vl_insert_vm512l">, + Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v256i1>], + [IntrNoMem]>; +} + +// Define intrinsics automatically generated +include "llvm/IR/IntrinsicsVEVL.gen.td" diff --git a/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td new file mode 100644 index 000000000000..67cbd307903d --- /dev/null +++ b/llvm/include/llvm/IR/IntrinsicsVEVL.gen.td @@ -0,0 +1,1213 @@ +let TargetPrefix = "ve" in def int_ve_vl_vld_vssl : GCCBuiltin<"__builtin_ve_vl_vld_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld_vssvl : GCCBuiltin<"__builtin_ve_vl_vld_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu_vssl : GCCBuiltin<"__builtin_ve_vl_vldu_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssl : GCCBuiltin<"__builtin_ve_vl_vldunc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldunc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldunc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssl : GCCBuiltin<"__builtin_ve_vl_vldlsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlsx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldlsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlsxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssl : GCCBuiltin<"__builtin_ve_vl_vldlzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlzx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldlzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldlzxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldlzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssl : GCCBuiltin<"__builtin_ve_vl_vld2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld2d_vssvl : GCCBuiltin<"__builtin_ve_vl_vld2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vld2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vld2dnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vld2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssl : GCCBuiltin<"__builtin_ve_vl_vldu2d_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu2d_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu2d_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldu2dnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldu2dnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldu2dnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dsx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dsx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dsx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dsxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dsxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dzx_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dzx_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dzx_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssl : GCCBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vldl2dzxnc_vssvl : GCCBuiltin<"__builtin_ve_vl_vldl2dzxnc_vssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, llvm_ptr_ty, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst_vssl : GCCBuiltin<"__builtin_ve_vl_vst_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst_vssml : GCCBuiltin<"__builtin_ve_vl_vst_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstot_vssl : GCCBuiltin<"__builtin_ve_vl_vstot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstot_vssml : GCCBuiltin<"__builtin_ve_vl_vstot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu_vssl : GCCBuiltin<"__builtin_ve_vl_vstu_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu_vssml : GCCBuiltin<"__builtin_ve_vl_vstu_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssl : GCCBuiltin<"__builtin_ve_vl_vstunc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstunc_vssml : GCCBuiltin<"__builtin_ve_vl_vstunc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssl : GCCBuiltin<"__builtin_ve_vl_vstuot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstuot_vssml : GCCBuiltin<"__builtin_ve_vl_vstuot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstuncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstuncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstuncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl_vssl : GCCBuiltin<"__builtin_ve_vl_vstl_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl_vssml : GCCBuiltin<"__builtin_ve_vl_vstl_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstlnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstlnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssl : GCCBuiltin<"__builtin_ve_vl_vstlot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlot_vssml : GCCBuiltin<"__builtin_ve_vl_vstlot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstlncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstlncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstlncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssl : GCCBuiltin<"__builtin_ve_vl_vst2d_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2d_vssml : GCCBuiltin<"__builtin_ve_vl_vst2d_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vst2dncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vst2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vst2dncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2d_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2d_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2d_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstu2dncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstu2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstu2dncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2d_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2d_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2d_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dnc_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dnc_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dnc_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dot_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssl : GCCBuiltin<"__builtin_ve_vl_vstl2dncot_vssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vstl2dncot_vssml : GCCBuiltin<"__builtin_ve_vl_vstl2dncot_vssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<i64>, llvm_ptr_ty, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pfchv_ssl : GCCBuiltin<"__builtin_ve_vl_pfchv_ssl">, Intrinsic<[], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrInaccessibleMemOrArgMemOnly]>; +let TargetPrefix = "ve" in def int_ve_vl_pfchvnc_ssl : GCCBuiltin<"__builtin_ve_vl_pfchvnc_ssl">, Intrinsic<[], [LLVMType<i64>, llvm_ptr_ty, LLVMType<i32>], [IntrInaccessibleMemOrArgMemOnly]>; +let TargetPrefix = "ve" in def int_ve_vl_lsv_vvss : GCCBuiltin<"__builtin_ve_vl_lsv_vvss">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i64>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lvsl_svs : GCCBuiltin<"__builtin_ve_vl_lvsl_svs">, Intrinsic<[LLVMType<i64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lvsd_svs : GCCBuiltin<"__builtin_ve_vl_lvsd_svs">, Intrinsic<[LLVMType<f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lvss_svs : GCCBuiltin<"__builtin_ve_vl_lvss_svs">, Intrinsic<[LLVMType<f32>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lvm_mmss : GCCBuiltin<"__builtin_ve_vl_lvm_mmss">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<i64>, LLVMType<i64>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lvm_MMss : GCCBuiltin<"__builtin_ve_vl_lvm_MMss">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<i64>, LLVMType<i64>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_svm_sms : GCCBuiltin<"__builtin_ve_vl_svm_sms">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i64>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_svm_sMs : GCCBuiltin<"__builtin_ve_vl_svm_sMs">, Intrinsic<[LLVMType<i64>], [LLVMType<v512i1>, LLVMType<i64>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsl : GCCBuiltin<"__builtin_ve_vl_vbrdd_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrdd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdd_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrdd_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsl : GCCBuiltin<"__builtin_ve_vl_vbrdl_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrdl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdl_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrdl_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsl : GCCBuiltin<"__builtin_ve_vl_vbrds_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrds_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrds_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrds_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsl : GCCBuiltin<"__builtin_ve_vl_vbrdw_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsvl : GCCBuiltin<"__builtin_ve_vl_vbrdw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vbrdw_vsmvl : GCCBuiltin<"__builtin_ve_vl_vbrdw_vsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsl : GCCBuiltin<"__builtin_ve_vl_pvbrd_vsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsvl : GCCBuiltin<"__builtin_ve_vl_pvbrd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvbrd_vsMvl : GCCBuiltin<"__builtin_ve_vl_pvbrd_vsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvl : GCCBuiltin<"__builtin_ve_vl_vmv_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmv_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmv_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmv_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vadduw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vadduw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vadduw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvaddu_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvaddu_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvadds_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvadds_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvadds_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vaddsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vaddsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubuw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubuw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubu_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubu_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsubs_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvsubs_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsubsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vsubsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmuluw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmuluw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmulsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vvvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vsvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmulslw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmulslw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivul_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivul_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivuw_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivuw_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswsx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivswsx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivswzx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivswzx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vdivsl_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vdivsl_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpul_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpul_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpuw_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpuw_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmpu_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmpu_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcmps_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvcmps_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcmpsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vcmpsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmaxs_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvmaxs_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswsx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vminswsx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminswzx_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vminswzx_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmins_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvmins_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvmins_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmaxsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmaxsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminsl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vminsl_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vminsl_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vand_vvvl : GCCBuiltin<"__builtin_ve_vl_vand_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vand_vvvvl : GCCBuiltin<"__builtin_ve_vl_vand_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vand_vsvl : GCCBuiltin<"__builtin_ve_vl_vand_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vand_vsvvl : GCCBuiltin<"__builtin_ve_vl_vand_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vand_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vand_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vand_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vand_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvl : GCCBuiltin<"__builtin_ve_vl_pvand_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvand_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvl : GCCBuiltin<"__builtin_ve_vl_pvand_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvand_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvand_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvand_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvand_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvand_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vor_vvvl : GCCBuiltin<"__builtin_ve_vl_vor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vor_vvvvl : GCCBuiltin<"__builtin_ve_vl_vor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vor_vsvl : GCCBuiltin<"__builtin_ve_vl_vor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vor_vsvvl : GCCBuiltin<"__builtin_ve_vl_vor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vor_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vor_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vor_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vor_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvl : GCCBuiltin<"__builtin_ve_vl_pvor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvl : GCCBuiltin<"__builtin_ve_vl_pvor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvor_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvor_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvor_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvor_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvl : GCCBuiltin<"__builtin_ve_vl_vxor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvvl : GCCBuiltin<"__builtin_ve_vl_vxor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvl : GCCBuiltin<"__builtin_ve_vl_vxor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvvl : GCCBuiltin<"__builtin_ve_vl_vxor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vxor_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vxor_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vxor_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vxor_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvxor_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvxor_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvxor_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvl : GCCBuiltin<"__builtin_ve_vl_veqv_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvvl : GCCBuiltin<"__builtin_ve_vl_veqv_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvl : GCCBuiltin<"__builtin_ve_vl_veqv_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvvl : GCCBuiltin<"__builtin_ve_vl_veqv_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_veqv_vvvmvl : GCCBuiltin<"__builtin_ve_vl_veqv_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_veqv_vsvmvl : GCCBuiltin<"__builtin_ve_vl_veqv_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pveqv_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pveqv_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pveqv_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vseq_vl : GCCBuiltin<"__builtin_ve_vl_vseq_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vseq_vvl : GCCBuiltin<"__builtin_ve_vl_vseq_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvseqlo_vl : GCCBuiltin<"__builtin_ve_vl_pvseqlo_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvseqlo_vvl : GCCBuiltin<"__builtin_ve_vl_pvseqlo_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsequp_vl : GCCBuiltin<"__builtin_ve_vl_pvsequp_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsequp_vvl : GCCBuiltin<"__builtin_ve_vl_pvsequp_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvseq_vl : GCCBuiltin<"__builtin_ve_vl_pvseq_vl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvseq_vvl : GCCBuiltin<"__builtin_ve_vl_pvseq_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsl : GCCBuiltin<"__builtin_ve_vl_vsll_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsll_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsll_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsll_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsll_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsll_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrl_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsrl_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsrl_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsrl_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawsx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vslawsx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslawzx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vslawzx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsla_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsla_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsl : GCCBuiltin<"__builtin_ve_vl_vslal_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslal_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vslal_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vslal_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawsx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsrawsx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsrawzx_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsrawzx_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvsra_vvsMvl : GCCBuiltin<"__builtin_ve_vl_pvsra_vvsMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsl : GCCBuiltin<"__builtin_ve_vl_vsral_vvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsral_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsral_vvsmvl : GCCBuiltin<"__builtin_ve_vl_vsral_vvsmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssvl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsfa_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vsfa_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfaddd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfaddd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfadds_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfadds_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfadds_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfadd_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfadd_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsubs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfsubs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfsub_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfsub_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuld_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuld_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmuls_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmuls_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmul_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmul_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfdivs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfdivs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvl : GCCBuiltin<"__builtin_ve_vl_vfsqrtd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsqrtd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsqrtd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvl : GCCBuiltin<"__builtin_ve_vl_vfsqrts_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsqrts_vvvl : GCCBuiltin<"__builtin_ve_vl_vfsqrts_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmpd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmpd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfcmps_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfcmps_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfcmp_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfcmp_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxd_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxd_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmaxs_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmaxs_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmax_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmax_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmind_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmind_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmind_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmins_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmins_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmins_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmin_vsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmin_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmadd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmadd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmads_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmads_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmad_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmad_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmsbs_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfmsbs_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmsb_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfmsb_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmadd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmadd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmads_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmads_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmad_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmad_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbd_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbd_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vsvvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vsvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfnmsbs_vvsvmvl : GCCBuiltin<"__builtin_ve_vl_vfnmsbs_vvsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<f32>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vsvvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vsvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfnmsb_vvsvMvl : GCCBuiltin<"__builtin_ve_vl_pvfnmsb_vvsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvl : GCCBuiltin<"__builtin_ve_vl_vrcpd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrcpd_vvvl : GCCBuiltin<"__builtin_ve_vl_vrcpd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvl : GCCBuiltin<"__builtin_ve_vl_vrcps_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrcps_vvvl : GCCBuiltin<"__builtin_ve_vl_vrcps_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvl : GCCBuiltin<"__builtin_ve_vl_pvrcp_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvrcp_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrcp_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrtd_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrts_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrts_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrts_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrt_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvrsqrt_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrt_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrtdnex_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtdnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrsqrtsnex_vvvl : GCCBuiltin<"__builtin_ve_vl_vrsqrtsnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvrsqrtnex_vvvl : GCCBuiltin<"__builtin_ve_vl_pvrsqrtnex_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdsxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdsxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwdzxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwdzxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwssx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwssxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwssxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwszx_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszx_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtwszxrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtwszxrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtws_vvMvl : GCCBuiltin<"__builtin_ve_vl_pvcvtws_vvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtwsrz_vvMvl : GCCBuiltin<"__builtin_ve_vl_pvcvtwsrz_vvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtld_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtld_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtldrz_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcvtldrz_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtdw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtdw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtdw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtsw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtsw_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtsw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvl : GCCBuiltin<"__builtin_ve_vl_pvcvtsw_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvcvtsw_vvvl : GCCBuiltin<"__builtin_ve_vl_pvcvtsw_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtdl_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtdl_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtdl_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtds_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtds_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtds_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvl : GCCBuiltin<"__builtin_ve_vl_vcvtsd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcvtsd_vvvl : GCCBuiltin<"__builtin_ve_vl_vcvtsd_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvml : GCCBuiltin<"__builtin_ve_vl_vmrg_vvvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrg_vvvmvl : GCCBuiltin<"__builtin_ve_vl_vmrg_vvvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvml : GCCBuiltin<"__builtin_ve_vl_vmrg_vsvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrg_vsvmvl : GCCBuiltin<"__builtin_ve_vl_vmrg_vsvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i64>, LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vvvMl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vvvMvl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vvvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vsvMl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vmrgw_vsvMvl : GCCBuiltin<"__builtin_ve_vl_vmrgw_vsvMvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<i32>, LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsl : GCCBuiltin<"__builtin_ve_vl_vshf_vvvsl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vshf_vvvsvl : GCCBuiltin<"__builtin_ve_vl_vshf_vvvsvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vcp_vvmvl : GCCBuiltin<"__builtin_ve_vl_vcp_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vex_vvmvl : GCCBuiltin<"__builtin_ve_vl_vex_vvmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklat_ml : GCCBuiltin<"__builtin_ve_vl_vfmklat_ml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklaf_ml : GCCBuiltin<"__builtin_ve_vl_vfmklaf_ml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkat_Ml : GCCBuiltin<"__builtin_ve_vl_pvfmkat_Ml">, Intrinsic<[LLVMType<v512i1>], [LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkaf_Ml : GCCBuiltin<"__builtin_ve_vl_pvfmkaf_Ml">, Intrinsic<[LLVMType<v512i1>], [LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkllt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkllt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkllt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkllt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkleq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkleq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkleq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkleq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkleqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkleqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkleqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkleqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmklgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmklgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmklgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkllenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkllenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkllenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkllenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwlt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwlt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwlt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwlt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkweq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkweq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkweq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkweq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkweqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkweqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkweqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkweqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwlenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkwlenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkwlenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkwlenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlone_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlone_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupne_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlone_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlone_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupne_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlole_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlole_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuple_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwuple_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlole_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlole_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuple_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwuple_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlonenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlonenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupnenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwloeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwloeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlogenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlogenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwupgenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwupgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlolenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwlolenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwuplenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkwuplenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwne_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwne_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwne_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwne_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkweq_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkweq_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkweq_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkweq_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwge_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwge_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwge_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwge_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwle_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwle_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwle_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwle_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnum_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnum_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnum_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnum_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgtnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgtnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgtnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgtnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwltnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwltnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwltnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwltnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwnenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwnenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkweqnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkweqnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkweqnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkweqnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwgenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwgenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkwlenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkwlenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdlt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdlt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdlt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdlt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdeq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdeq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdlenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkdlenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkdlenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkdlenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksgt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksgt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkslt_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkslt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkslt_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkslt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksne_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksne_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkseq_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkseq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkseq_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkseq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksge_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksge_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksle_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksle_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksle_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksle_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksnum_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksnum_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksltnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksltnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksnenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksnenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkseqnan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkseqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkseqnan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkseqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksgenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmksgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmksgenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmksgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkslenan_mvl : GCCBuiltin<"__builtin_ve_vl_vfmkslenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfmkslenan_mvml : GCCBuiltin<"__builtin_ve_vl_vfmkslenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslogt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupgt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslogt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupgt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslolt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplt_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksuplt_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslolt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplt_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksuplt_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslone_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslone_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupne_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupne_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslone_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslone_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupne_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupne_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeq_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupeq_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeq_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupeq_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupge_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupge_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupge_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupge_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslole_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslole_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksuple_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksuple_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslole_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslole_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksuple_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksuple_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslonum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnum_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupnum_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslonum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnum_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupnum_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslonan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslonan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslogtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgtnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupgtnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslogtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgtnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupgtnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupltnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupltnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupltnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupltnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslonenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupnenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslonenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslonenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupnenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupnenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksloeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeqnan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupeqnan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksloeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksloeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupeqnan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupeqnan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslogenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksupgenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslogenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslogenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksupgenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksupgenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslolenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplenan_mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksuplenan_mvl">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslolenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmkslolenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksuplenan_mvml : GCCBuiltin<"__builtin_ve_vl_pvfmksuplenan_mvml">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksgt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksgt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksgt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslt_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslt_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslt_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkslt_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksne_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksne_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksne_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksne_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkseq_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkseq_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkseq_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkseq_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksge_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksge_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksge_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksge_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksle_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksle_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksle_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksle_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksnum_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksnum_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksnum_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksnum_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksgtnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksgtnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksgtnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgtnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksltnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksltnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksltnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksltnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksnenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksnenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksnenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksnenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkseqnan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkseqnan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkseqnan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkseqnan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmksgenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmksgenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmksgenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_Mvl : GCCBuiltin<"__builtin_ve_vl_pvfmkslenan_Mvl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pvfmkslenan_MvMl : GCCBuiltin<"__builtin_ve_vl_pvfmkslenan_MvMl">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v256f64>, LLVMType<v512i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvl : GCCBuiltin<"__builtin_ve_vl_vsumwsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsumwsx_vvml : GCCBuiltin<"__builtin_ve_vl_vsumwsx_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvl : GCCBuiltin<"__builtin_ve_vl_vsumwzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsumwzx_vvml : GCCBuiltin<"__builtin_ve_vl_vsumwzx_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvl : GCCBuiltin<"__builtin_ve_vl_vsuml_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsuml_vvml : GCCBuiltin<"__builtin_ve_vl_vsuml_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvl : GCCBuiltin<"__builtin_ve_vl_vfsumd_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsumd_vvml : GCCBuiltin<"__builtin_ve_vl_vfsumd_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvl : GCCBuiltin<"__builtin_ve_vl_vfsums_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfsums_vvml : GCCBuiltin<"__builtin_ve_vl_vfsums_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswfstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswfstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxswlstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxswlstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswfstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswfstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswlstsx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstsx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswlstsx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstsx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswfstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswfstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswfstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswlstzx_vvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstzx_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminswlstzx_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminswlstzx_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxslfst_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxslfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxslfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxslfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxsllst_vvl : GCCBuiltin<"__builtin_ve_vl_vrmaxsllst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrmaxsllst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrmaxsllst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminslfst_vvl : GCCBuiltin<"__builtin_ve_vl_vrminslfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminslfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminslfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminsllst_vvl : GCCBuiltin<"__builtin_ve_vl_vrminsllst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrminsllst_vvvl : GCCBuiltin<"__builtin_ve_vl_vrminsllst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdlst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdlst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxdlst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxdlst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxsfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxsfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxsfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxsfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxslst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxslst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmaxslst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmaxslst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmindfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmindfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmindfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmindfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmindlst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrmindlst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrmindlst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrmindlst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrminsfst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrminsfst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrminsfst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrminsfst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrminslst_vvl : GCCBuiltin<"__builtin_ve_vl_vfrminslst_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vfrminslst_vvvl : GCCBuiltin<"__builtin_ve_vl_vfrminslst_vvvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrand_vvl : GCCBuiltin<"__builtin_ve_vl_vrand_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrand_vvml : GCCBuiltin<"__builtin_ve_vl_vrand_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vror_vvl : GCCBuiltin<"__builtin_ve_vl_vror_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vror_vvml : GCCBuiltin<"__builtin_ve_vl_vror_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvl : GCCBuiltin<"__builtin_ve_vl_vrxor_vvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vrxor_vvml : GCCBuiltin<"__builtin_ve_vl_vrxor_vvml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssl : GCCBuiltin<"__builtin_ve_vl_vgt_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgt_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssml : GCCBuiltin<"__builtin_ve_vl_vgt_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgt_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgt_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtnc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtu_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtu_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtunc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtunc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsx_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlsx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlsxnc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlsxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzx_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlzx_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssvl : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssml">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vgtlzxnc_vvssmvl : GCCBuiltin<"__builtin_ve_vl_vgtlzxnc_vvssmvl">, Intrinsic<[LLVMType<v256f64>], [LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<v256f64>, LLVMType<i32>], [IntrReadMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssl : GCCBuiltin<"__builtin_ve_vl_vsc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsc_vvssml : GCCBuiltin<"__builtin_ve_vl_vsc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vscnc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vscnc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscncot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscncot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscncot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscncot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscu_vvssl : GCCBuiltin<"__builtin_ve_vl_vscu_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscu_vvssml : GCCBuiltin<"__builtin_ve_vl_vscu_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscunc_vvssl : GCCBuiltin<"__builtin_ve_vl_vscunc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscunc_vvssml : GCCBuiltin<"__builtin_ve_vl_vscunc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscuot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscuot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscuot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscuot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscuncot_vvssl : GCCBuiltin<"__builtin_ve_vl_vscuncot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscuncot_vvssml : GCCBuiltin<"__builtin_ve_vl_vscuncot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscl_vvssl : GCCBuiltin<"__builtin_ve_vl_vscl_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vscl_vvssml : GCCBuiltin<"__builtin_ve_vl_vscl_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsclnc_vvssl : GCCBuiltin<"__builtin_ve_vl_vsclnc_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsclnc_vvssml : GCCBuiltin<"__builtin_ve_vl_vsclnc_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsclot_vvssl : GCCBuiltin<"__builtin_ve_vl_vsclot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsclot_vvssml : GCCBuiltin<"__builtin_ve_vl_vsclot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsclncot_vvssl : GCCBuiltin<"__builtin_ve_vl_vsclncot_vvssl">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_vsclncot_vvssml : GCCBuiltin<"__builtin_ve_vl_vsclncot_vvssml">, Intrinsic<[], [LLVMType<v256f64>, LLVMType<v256f64>, LLVMType<i64>, LLVMType<i64>, LLVMType<v256i1>, LLVMType<i32>], [IntrWriteMem]>; +let TargetPrefix = "ve" in def int_ve_vl_andm_mmm : GCCBuiltin<"__builtin_ve_vl_andm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_andm_MMM : GCCBuiltin<"__builtin_ve_vl_andm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_orm_mmm : GCCBuiltin<"__builtin_ve_vl_orm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_orm_MMM : GCCBuiltin<"__builtin_ve_vl_orm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_xorm_mmm : GCCBuiltin<"__builtin_ve_vl_xorm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_xorm_MMM : GCCBuiltin<"__builtin_ve_vl_xorm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_eqvm_mmm : GCCBuiltin<"__builtin_ve_vl_eqvm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_eqvm_MMM : GCCBuiltin<"__builtin_ve_vl_eqvm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_nndm_mmm : GCCBuiltin<"__builtin_ve_vl_nndm_mmm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>, LLVMType<v256i1>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_nndm_MMM : GCCBuiltin<"__builtin_ve_vl_nndm_MMM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v512i1>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_negm_mm : GCCBuiltin<"__builtin_ve_vl_negm_mm">, Intrinsic<[LLVMType<v256i1>], [LLVMType<v256i1>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_negm_MM : GCCBuiltin<"__builtin_ve_vl_negm_MM">, Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_pcvm_sml : GCCBuiltin<"__builtin_ve_vl_pcvm_sml">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_lzvm_sml : GCCBuiltin<"__builtin_ve_vl_lzvm_sml">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; +let TargetPrefix = "ve" in def int_ve_vl_tovm_sml : GCCBuiltin<"__builtin_ve_vl_tovm_sml">, Intrinsic<[LLVMType<i64>], [LLVMType<v256i1>, LLVMType<i32>], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index 7c9ceb148a47..d306d0ccb90d 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -50,9 +50,10 @@ def int_wasm_trunc_saturate_unsigned : Intrinsic<[llvm_anyint_ty], //===----------------------------------------------------------------------===// // throw / rethrow +// The immediate argument is an index to a tag, which is 0 for C++. def int_wasm_throw : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], [Throws, IntrNoReturn, ImmArg<ArgIndex<0>>]>; -def int_wasm_rethrow_in_catch : Intrinsic<[], [], [Throws, IntrNoReturn]>; +def int_wasm_rethrow : Intrinsic<[], [], [Throws, IntrNoReturn]>; // Since wasm does not use landingpad instructions, these instructions return // exception pointer and selector values until we lower them in WasmEHPrepare. @@ -60,10 +61,12 @@ def int_wasm_get_exception : Intrinsic<[llvm_ptr_ty], [llvm_token_ty], [IntrHasSideEffects]>; def int_wasm_get_ehselector : Intrinsic<[llvm_i32_ty], [llvm_token_ty], [IntrHasSideEffects]>; -// This is the same as llvm.wasm.get.exception except that it does not take a -// token operand. This is only for instruction selection purpose. -def int_wasm_extract_exception : Intrinsic<[llvm_ptr_ty], [], - [IntrHasSideEffects]>; + +// wasm.catch returns the pointer to the exception object caught by wasm 'catch' +// instruction. This returns a single pointer, which is sufficient for C++ +// support. The immediate argument is an index to for a tag, which is 0 for C++. +def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], + [IntrHasSideEffects, ImmArg<ArgIndex<0>>]>; // WebAssembly EH must maintain the landingpads in the order assigned to them // by WasmEHPrepare pass to generate landingpad table in EHStreamer. This is @@ -79,22 +82,23 @@ def int_wasm_lsda : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>; //===----------------------------------------------------------------------===// // wait / notify -def int_wasm_atomic_wait_i32 : +def int_wasm_memory_atomic_wait32 : Intrinsic<[llvm_i32_ty], [LLVMPointerType<llvm_i32_ty>, llvm_i32_ty, llvm_i64_ty], - [IntrInaccessibleMemOrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, - IntrHasSideEffects], - "", [SDNPMemOperand]>; -def int_wasm_atomic_wait_i64 : + [IntrInaccessibleMemOrArgMemOnly, ReadOnly<ArgIndex<0>>, + NoCapture<ArgIndex<0>>, IntrHasSideEffects], + "", [SDNPMemOperand]>; +def int_wasm_memory_atomic_wait64 : Intrinsic<[llvm_i32_ty], [LLVMPointerType<llvm_i64_ty>, llvm_i64_ty, llvm_i64_ty], - [IntrInaccessibleMemOrArgMemOnly, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>, - IntrHasSideEffects], - "", [SDNPMemOperand]>; -def int_wasm_atomic_notify: + [IntrInaccessibleMemOrArgMemOnly, ReadOnly<ArgIndex<0>>, + NoCapture<ArgIndex<0>>, IntrHasSideEffects], + "", [SDNPMemOperand]>; +def int_wasm_memory_atomic_notify: Intrinsic<[llvm_i32_ty], [LLVMPointerType<llvm_i32_ty>, llvm_i32_ty], - [IntrInaccessibleMemOnly, NoCapture<ArgIndex<0>>, IntrHasSideEffects], "", - [SDNPMemOperand]>; + [IntrInaccessibleMemOnly, NoCapture<ArgIndex<0>>, + IntrHasSideEffects], + "", [SDNPMemOperand]>; //===----------------------------------------------------------------------===// // SIMD intrinsics @@ -151,6 +155,7 @@ def int_wasm_dot : Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, IntrSpeculatable]>; + def int_wasm_narrow_signed : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>], @@ -159,21 +164,21 @@ def int_wasm_narrow_unsigned : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>], [IntrNoMem, IntrSpeculatable]>; + +// TODO: Replace these intrinsics with normal ISel patterns once i32x4 to i64x2 +// widening is merged to the proposal. def int_wasm_widen_low_signed : - Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty], - [IntrNoMem, IntrSpeculatable]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_wasm_widen_high_signed : - Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty], - [IntrNoMem, IntrSpeculatable]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_wasm_widen_low_unsigned : - Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty], - [IntrNoMem, IntrSpeculatable]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_wasm_widen_high_unsigned : - Intrinsic<[llvm_anyvector_ty], - [llvm_anyvector_ty], + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_q15mulr_saturate_signed : + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, IntrSpeculatable]>; // TODO: Replace these intrinsics with normal ISel patterns @@ -206,6 +211,143 @@ def int_wasm_nearest : [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; +// TODO: Replace these intrinsic with normal ISel patterns once the +// load_zero instructions are merged to the proposal. +def int_wasm_load32_zero : + Intrinsic<[llvm_v4i32_ty], + [LLVMPointerType<llvm_i32_ty>], + [IntrReadMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; + +def int_wasm_load64_zero : + Intrinsic<[llvm_v2i64_ty], + [LLVMPointerType<llvm_i64_ty>], + [IntrReadMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; + +// These intrinsics do not mark their lane index arguments as immediate because +// that changes the corresponding SDNode from ISD::Constant to +// ISD::TargetConstant, which would require extra complications in the ISel +// tablegen patterns. TODO: Replace these intrinsic with normal ISel patterns +// once the load_lane instructions are merged to the proposal. +def int_wasm_load8_lane : + Intrinsic<[llvm_v16i8_ty], + [LLVMPointerType<llvm_i8_ty>, llvm_v16i8_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; +def int_wasm_load16_lane : + Intrinsic<[llvm_v8i16_ty], + [LLVMPointerType<llvm_i16_ty>, llvm_v8i16_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; +def int_wasm_load32_lane : + Intrinsic<[llvm_v4i32_ty], + [LLVMPointerType<llvm_i32_ty>, llvm_v4i32_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; +def int_wasm_load64_lane : + Intrinsic<[llvm_v2i64_ty], + [LLVMPointerType<llvm_i64_ty>, llvm_v2i64_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; +def int_wasm_store8_lane : + Intrinsic<[], + [LLVMPointerType<llvm_i8_ty>, llvm_v16i8_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; +def int_wasm_store16_lane : + Intrinsic<[], + [LLVMPointerType<llvm_i16_ty>, llvm_v8i16_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; +def int_wasm_store32_lane : + Intrinsic<[], + [LLVMPointerType<llvm_i32_ty>, llvm_v4i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; +def int_wasm_store64_lane : + Intrinsic<[], + [LLVMPointerType<llvm_i64_ty>, llvm_v2i64_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly], + "", [SDNPMemOperand]>; + +// TODO: Replace this intrinsic with normal ISel patterns once popcnt is merged +// to the proposal. +def int_wasm_popcnt : + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_extmul_low_signed : + Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_extmul_high_signed : + Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_extmul_low_unsigned : + Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_extmul_high_unsigned : + Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_extadd_pairwise_signed : + Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_extadd_pairwise_unsigned : + Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_signselect : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; + +// TODO: Remove this intrinsic and the associated builtin if i64x2.eq gets +// merged to the proposal. +def int_wasm_eq : + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem, IntrSpeculatable]>; + +// TODO: Remove this after experiments have been run. Use the target-agnostic +// int_prefetch if this becomes specified at some point. +def int_wasm_prefetch_t : + Intrinsic<[], [llvm_ptr_ty], + [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, + ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>], + "", [SDNPMemOperand]>; + +def int_wasm_prefetch_nt : + Intrinsic<[], [llvm_ptr_ty], + [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, + ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>], + "", [SDNPMemOperand]>; + +// TODO: Remove these if possible if they are merged to the spec. +def int_wasm_convert_low_signed : + Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_convert_low_unsigned : + Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_trunc_saturate_zero_signed : + Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_trunc_saturate_zero_unsigned : + Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_demote_zero : + Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_promote_low : + Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], + [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Thread-local storage intrinsics //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 3f86fd075d3a..bba12139976e 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -283,11 +283,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". IntrHasSideEffects]>; def int_x86_sse_ldmxcsr : Intrinsic<[], [llvm_ptr_ty], - [IntrReadMem, IntrArgMemOnly, IntrHasSideEffects, // FIXME: LDMXCSR does not actually write to memory, - // but Fast and DAG Isel both use writing to memory - // as a proxy for having side effects. - IntrWriteMem]>; + // but intrinsic properties are generated incorrectly + // for IntrReadMem+IntrHasSideEffects. + [/*IntrReadMem, IntrArgMemOnly,*/ IntrHasSideEffects]>; } // Misc. @@ -4749,26 +4748,26 @@ let TargetPrefix = "x86" in { let TargetPrefix = "x86" in { // NOTE: These comparison intrinsics are not used by clang as long as the // distinction in signaling behaviour is not implemented. - def int_x86_avx512_cmp_ps_512 : + def int_x86_avx512_mask_cmp_ps_512 : Intrinsic<[llvm_v16i1_ty], [llvm_v16f32_ty, llvm_v16f32_ty, - llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; - def int_x86_avx512_cmp_pd_512 : + llvm_i32_ty, llvm_v16i1_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>]>; + def int_x86_avx512_mask_cmp_pd_512 : Intrinsic<[llvm_v8i1_ty], [llvm_v8f64_ty, llvm_v8f64_ty, - llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>]>; - def int_x86_avx512_cmp_ps_256 : + llvm_i32_ty, llvm_v8i1_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>>]>; + def int_x86_avx512_mask_cmp_ps_256 : Intrinsic<[llvm_v8i1_ty], [llvm_v8f32_ty, llvm_v8f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; - def int_x86_avx512_cmp_pd_256 : + llvm_i32_ty, llvm_v8i1_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; + def int_x86_avx512_mask_cmp_pd_256 : Intrinsic<[llvm_v4i1_ty], [llvm_v4f64_ty, llvm_v4f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; - def int_x86_avx512_cmp_ps_128 : + llvm_i32_ty, llvm_v4i1_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; + def int_x86_avx512_mask_cmp_ps_128 : Intrinsic<[llvm_v4i1_ty], [llvm_v4f32_ty, llvm_v4f32_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; - def int_x86_avx512_cmp_pd_128 : + llvm_i32_ty, llvm_v4i1_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; + def int_x86_avx512_mask_cmp_pd_128 : Intrinsic<[llvm_v2i1_ty], [llvm_v2f64_ty, llvm_v2f64_ty, - llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; + llvm_i32_ty, llvm_v2i1_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; def int_x86_avx512_mask_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss_mask">, @@ -4948,6 +4947,59 @@ let TargetPrefix = "x86" in { def int_x86_xresldtrk : GCCBuiltin<"__builtin_ia32_xresldtrk">, Intrinsic<[], [], []>; } + +//===----------------------------------------------------------------------===// +// Key Locker +let TargetPrefix = "x86" in { + def int_x86_loadiwkey : GCCBuiltin<"__builtin_ia32_loadiwkey">, + Intrinsic<[], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], + []>; + def int_x86_encodekey128 : + Intrinsic<[llvm_i32_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [llvm_i32_ty, llvm_v2i64_ty], []>; + def int_x86_encodekey256 : + Intrinsic<[llvm_i32_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [llvm_i32_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>; + def int_x86_aesenc128kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty], []>; + def int_x86_aesdec128kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty], []>; + def int_x86_aesenc256kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty], []>; + def int_x86_aesdec256kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty], [llvm_v2i64_ty, llvm_ptr_ty], []>; + def int_x86_aesencwide128kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>; + def int_x86_aesdecwide128kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>; + def int_x86_aesencwide256kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>; + def int_x86_aesdecwide256kl : + Intrinsic<[llvm_i8_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>; +} + //===----------------------------------------------------------------------===// // AMX - Intel AMX extensions @@ -4959,21 +5011,68 @@ let TargetPrefix = "x86" in { def int_x86_tilerelease : GCCBuiltin<"__builtin_ia32_tilerelease">, Intrinsic<[], [], []>; def int_x86_tilezero : GCCBuiltin<"__builtin_ia32_tilezero">, - Intrinsic<[], [llvm_i8_ty], []>; + Intrinsic<[], [llvm_i8_ty], [ImmArg<ArgIndex<0>>]>; def int_x86_tileloadd64 : GCCBuiltin<"__builtin_ia32_tileloadd64">, - Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty], []>; + Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty], + [ImmArg<ArgIndex<0>>]>; def int_x86_tileloaddt164 : GCCBuiltin<"__builtin_ia32_tileloaddt164">, - Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty], []>; + Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty], + [ImmArg<ArgIndex<0>>]>; def int_x86_tilestored64 : GCCBuiltin<"__builtin_ia32_tilestored64">, - Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty], []>; + Intrinsic<[], [llvm_i8_ty, llvm_ptr_ty, llvm_i64_ty], + [ImmArg<ArgIndex<0>>]>; def int_x86_tdpbssd : GCCBuiltin<"__builtin_ia32_tdpbssd">, - Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], []>; + Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], + [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, + ImmArg<ArgIndex<2>>]>; def int_x86_tdpbsud : GCCBuiltin<"__builtin_ia32_tdpbsud">, - Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], []>; + Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], + [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, + ImmArg<ArgIndex<2>>]>; def int_x86_tdpbusd : GCCBuiltin<"__builtin_ia32_tdpbusd">, - Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], []>; + Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], + [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, + ImmArg<ArgIndex<2>>]>; def int_x86_tdpbuud : GCCBuiltin<"__builtin_ia32_tdpbuud">, - Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], []>; + Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], + [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, + ImmArg<ArgIndex<2>>]>; def int_x86_tdpbf16ps : GCCBuiltin<"__builtin_ia32_tdpbf16ps">, - Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], []>; + Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty], + [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>, + ImmArg<ArgIndex<2>>]>; + // AMX - internal intrinsics + def int_x86_tileloadd64_internal : + GCCBuiltin<"__builtin_ia32_tileloadd64_internal">, + Intrinsic<[llvm_x86amx_ty], + [llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, llvm_i64_ty], + []>; + def int_x86_tdpbssd_internal : + GCCBuiltin<"__builtin_ia32_tdpbssd_internal">, + Intrinsic<[llvm_x86amx_ty], + [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, + llvm_x86amx_ty, llvm_x86amx_ty, + llvm_x86amx_ty], []>; + def int_x86_tilestored64_internal : + GCCBuiltin<"__builtin_ia32_tilestored64_internal">, + Intrinsic<[], [llvm_i16_ty, llvm_i16_ty, llvm_ptr_ty, + llvm_i64_ty, llvm_x86amx_ty], []>; + def int_x86_tilezero_internal : + GCCBuiltin<"__builtin_ia32_tilezero_internal">, + Intrinsic<[llvm_x86amx_ty], [llvm_i16_ty, llvm_i16_ty], + []>; +} + +//===----------------------------------------------------------------------===// +// UINTR - User Level Interrupt + +let TargetPrefix = "x86" in { + def int_x86_clui : GCCBuiltin<"__builtin_ia32_clui">, + Intrinsic<[], [], []>; + def int_x86_stui : GCCBuiltin<"__builtin_ia32_stui">, + Intrinsic<[], [], []>; + def int_x86_testui : GCCBuiltin<"__builtin_ia32_testui">, + Intrinsic<[llvm_i8_ty], [], []>; + def int_x86_senduipi : GCCBuiltin<"__builtin_ia32_senduipi">, + Intrinsic<[], [llvm_i64_ty], []>; } diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index c465e02c2fc5..8f8a35d07c64 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -222,13 +222,23 @@ public: void setDiagnosticsHotnessRequested(bool Requested); /// Return the minimum hotness value a diagnostic would need in order - /// to be included in optimization diagnostics. If there is no minimum, this - /// returns None. + /// to be included in optimization diagnostics. + /// + /// Three possible return values: + /// 0 - threshold is disabled. Everything will be printed out. + /// positive int - threshold is set. + /// UINT64_MAX - threshold is not yet set, and needs to be synced from + /// profile summary. Note that in case of missing profile + /// summary, threshold will be kept at "MAX", effectively + /// suppresses all remarks output. uint64_t getDiagnosticsHotnessThreshold() const; /// Set the minimum hotness value a diagnostic needs in order to be /// included in optimization diagnostics. - void setDiagnosticsHotnessThreshold(uint64_t Threshold); + void setDiagnosticsHotnessThreshold(Optional<uint64_t> Threshold); + + /// Return if hotness threshold is requested from PSI. + bool isDiagnosticsHotnessThresholdSetFromPSI() const; /// The "main remark streamer" used by all the specialized remark streamers. /// This streamer keeps generic remark metadata in memory throughout the life diff --git a/llvm/include/llvm/IR/LLVMRemarkStreamer.h b/llvm/include/llvm/IR/LLVMRemarkStreamer.h index 97082a44e62f..e7627e993370 100644 --- a/llvm/include/llvm/IR/LLVMRemarkStreamer.h +++ b/llvm/include/llvm/IR/LLVMRemarkStreamer.h @@ -79,16 +79,15 @@ Expected<std::unique_ptr<ToolOutputFile>> setupLLVMOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses, StringRef RemarksFormat, bool RemarksWithHotness, - unsigned RemarksHotnessThreshold = 0); + Optional<uint64_t> RemarksHotnessThreshold = 0); /// Setup optimization remarks that output directly to a raw_ostream. /// \p OS is managed by the caller and should be open for writing as long as \p /// Context is streaming remarks to it. -Error setupLLVMOptimizationRemarks(LLVMContext &Context, raw_ostream &OS, - StringRef RemarksPasses, - StringRef RemarksFormat, - bool RemarksWithHotness, - unsigned RemarksHotnessThreshold = 0); +Error setupLLVMOptimizationRemarks( + LLVMContext &Context, raw_ostream &OS, StringRef RemarksPasses, + StringRef RemarksFormat, bool RemarksWithHotness, + Optional<uint64_t> RemarksHotnessThreshold = 0); } // end namespace llvm diff --git a/llvm/include/llvm/IR/LegacyPassManagers.h b/llvm/include/llvm/IR/LegacyPassManagers.h index 6b1ddd4d79f8..f4fae184e428 100644 --- a/llvm/include/llvm/IR/LegacyPassManagers.h +++ b/llvm/include/llvm/IR/LegacyPassManagers.h @@ -88,7 +88,6 @@ namespace llvm { template <typename T> class ArrayRef; class Module; -class Pass; class StringRef; class Value; class Timer; @@ -231,11 +230,11 @@ private: // Map to keep track of last user of the analysis pass. // LastUser->second is the last user of Lastuser->first. + // This is kept in sync with InversedLastUser. DenseMap<Pass *, Pass *> LastUser; // Map to keep track of passes that are last used by a pass. - // This inverse map is initialized at PM->run() based on - // LastUser map. + // This is kept in sync with LastUser. DenseMap<Pass *, SmallPtrSet<Pass *, 8> > InversedLastUser; /// Immutable passes are managed by top level manager. diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h index 11e2e2623257..51be8667f1c1 100644 --- a/llvm/include/llvm/IR/MDBuilder.h +++ b/llvm/include/llvm/IR/MDBuilder.h @@ -76,9 +76,8 @@ public: /// Return metadata containing the section prefix for a function. MDNode *createFunctionSectionPrefix(StringRef Prefix); - /// return metadata containing expected value - MDNode *createMisExpect(uint64_t Index, uint64_t LikelyWeight, - uint64_t UnlikelyWeight); + /// Return metadata containing the pseudo probe descriptor for a function. + MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, Function *F); //===------------------------------------------------------------------===// // Range metadata. diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h index 5d04b3563dd5..084b1d49569e 100644 --- a/llvm/include/llvm/IR/MatrixBuilder.h +++ b/llvm/include/llvm/IR/MatrixBuilder.h @@ -38,14 +38,19 @@ template <class IRBuilderTy> class MatrixBuilder { Value *RHS) { assert((LHS->getType()->isVectorTy() || RHS->getType()->isVectorTy()) && "One of the operands must be a matrix (embedded in a vector)"); - if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy()) + if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy()) { + assert(!isa<ScalableVectorType>(LHS->getType()) && + "LHS Assumed to be fixed width"); RHS = B.CreateVectorSplat( - cast<VectorType>(LHS->getType())->getNumElements(), RHS, + cast<VectorType>(LHS->getType())->getElementCount(), RHS, "scalar.splat"); - else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy()) + } else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy()) { + assert(!isa<ScalableVectorType>(RHS->getType()) && + "RHS Assumed to be fixed width"); LHS = B.CreateVectorSplat( - cast<VectorType>(RHS->getType())->getNumElements(), LHS, + cast<VectorType>(RHS->getType())->getElementCount(), LHS, "scalar.splat"); + } return {LHS, RHS}; } @@ -155,14 +160,19 @@ public: /// matrixes. Value *CreateAdd(Value *LHS, Value *RHS) { assert(LHS->getType()->isVectorTy() || RHS->getType()->isVectorTy()); - if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy()) + if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy()) { + assert(!isa<ScalableVectorType>(LHS->getType()) && + "LHS Assumed to be fixed width"); RHS = B.CreateVectorSplat( - cast<VectorType>(LHS->getType())->getNumElements(), RHS, + cast<VectorType>(LHS->getType())->getElementCount(), RHS, "scalar.splat"); - else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy()) + } else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy()) { + assert(!isa<ScalableVectorType>(RHS->getType()) && + "RHS Assumed to be fixed width"); LHS = B.CreateVectorSplat( - cast<VectorType>(RHS->getType())->getNumElements(), LHS, + cast<VectorType>(RHS->getType())->getElementCount(), LHS, "scalar.splat"); + } return cast<VectorType>(LHS->getType()) ->getElementType() @@ -175,14 +185,19 @@ public: /// point matrixes. Value *CreateSub(Value *LHS, Value *RHS) { assert(LHS->getType()->isVectorTy() || RHS->getType()->isVectorTy()); - if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy()) + if (LHS->getType()->isVectorTy() && !RHS->getType()->isVectorTy()) { + assert(!isa<ScalableVectorType>(LHS->getType()) && + "LHS Assumed to be fixed width"); RHS = B.CreateVectorSplat( - cast<VectorType>(LHS->getType())->getNumElements(), RHS, + cast<VectorType>(LHS->getType())->getElementCount(), RHS, "scalar.splat"); - else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy()) + } else if (!LHS->getType()->isVectorTy() && RHS->getType()->isVectorTy()) { + assert(!isa<ScalableVectorType>(RHS->getType()) && + "RHS Assumed to be fixed width"); LHS = B.CreateVectorSplat( - cast<VectorType>(RHS->getType())->getNumElements(), LHS, + cast<VectorType>(RHS->getType())->getElementCount(), LHS, "scalar.splat"); + } return cast<VectorType>(LHS->getType()) ->getElementType() diff --git a/llvm/include/llvm/IR/Metadata.def b/llvm/include/llvm/IR/Metadata.def index 1df60cadac08..f31be8d1bc0c 100644 --- a/llvm/include/llvm/IR/Metadata.def +++ b/llvm/include/llvm/IR/Metadata.def @@ -114,6 +114,8 @@ HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DICommonBlock) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIStringType) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGenericSubrange) #undef HANDLE_METADATA #undef HANDLE_METADATA_LEAF diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 46526c70ea3b..0b87416befe9 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -1128,8 +1128,7 @@ class MDTuple : public MDNode { StorageType Storage, bool ShouldCreate = true); TempMDTuple cloneImpl() const { - return getTemporary(getContext(), - SmallVector<Metadata *, 4>(op_begin(), op_end())); + return getTemporary(getContext(), SmallVector<Metadata *, 4>(operands())); } public: @@ -1190,6 +1189,33 @@ void TempMDNodeDeleter::operator()(MDNode *Node) const { MDNode::deleteTemporary(Node); } +/// This is a simple wrapper around an MDNode which provides a higher-level +/// interface by hiding the details of how alias analysis information is encoded +/// in its operands. +class AliasScopeNode { + const MDNode *Node = nullptr; + +public: + AliasScopeNode() = default; + explicit AliasScopeNode(const MDNode *N) : Node(N) {} + + /// Get the MDNode for this AliasScopeNode. + const MDNode *getNode() const { return Node; } + + /// Get the MDNode for this AliasScopeNode's domain. + const MDNode *getDomain() const { + if (Node->getNumOperands() < 2) + return nullptr; + return dyn_cast_or_null<MDNode>(Node->getOperand(1)); + } + StringRef getName() const { + if (Node->getNumOperands() > 2) + if (MDString *N = dyn_cast_or_null<MDString>(Node->getOperand(2))) + return N->getString(); + return StringRef(); + } +}; + /// Typed iterator through MDNode operands. /// /// An iterator that transforms an \a MDNode::iterator into an iterator over a diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index 3f97d048f862..3664b275114d 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -329,10 +329,6 @@ public: /// \see LLVMContext::getOperandBundleTagID void getOperandBundleTags(SmallVectorImpl<StringRef> &Result) const; - /// Return the type with the specified name, or null if there is none by that - /// name. - StructType *getTypeByName(StringRef Name) const; - std::vector<StructType *> getIdentifiedStructTypes() const; /// @} @@ -854,12 +850,11 @@ public: /// Returns profile summary metadata. When IsCS is true, use the context /// sensitive profile summary. - Metadata *getProfileSummary(bool IsCS); + Metadata *getProfileSummary(bool IsCS) const; /// @} /// Returns whether semantic interposition is to be respected. bool getSemanticInterposition() const; - bool noSemanticInterposition() const; /// Set whether semantic interposition is to be respected. void setSemanticInterposition(bool); diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 12a829b14e36..d5a7ad63737a 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -562,12 +562,11 @@ public: /// offsets from the beginning of the value that are passed. struct Call { uint64_t ParamNo = 0; - GlobalValue::GUID Callee = 0; + ValueInfo Callee; ConstantRange Offsets{/*BitWidth=*/RangeWidth, /*isFullSet=*/true}; Call() = default; - Call(uint64_t ParamNo, GlobalValue::GUID Callee, - const ConstantRange &Offsets) + Call(uint64_t ParamNo, ValueInfo Callee, const ConstantRange &Offsets) : ParamNo(ParamNo), Callee(Callee), Offsets(Offsets) {} }; @@ -597,7 +596,7 @@ public: GlobalValue::LinkageTypes::AvailableExternallyLinkage, /*NotEligibleToImport=*/true, /*Live=*/true, /*IsLocal=*/false, /*CanAutoHide=*/false), - /*InsCount=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0, + /*NumInsts=*/0, FunctionSummary::FFlags{}, /*EntryCount=*/0, std::vector<ValueInfo>(), std::move(Edges), std::vector<GlobalValue::GUID>(), std::vector<FunctionSummary::VFuncId>(), @@ -1061,6 +1060,9 @@ private: // some were not. Set when the combined index is created during the thin link. bool PartiallySplitLTOUnits = false; + /// True if some of the FunctionSummary contains a ParamAccess. + bool HasParamAccess = false; + std::set<std::string> CfiFunctionDefs; std::set<std::string> CfiFunctionDecls; @@ -1213,6 +1215,8 @@ public: bool partiallySplitLTOUnits() const { return PartiallySplitLTOUnits; } void setPartiallySplitLTOUnits() { PartiallySplitLTOUnits = true; } + bool hasParamAccess() const { return HasParamAccess; } + bool isGlobalValueLive(const GlobalValueSummary *GVS) const { return !WithGlobalValueDeadStripping || GVS->isLive(); } @@ -1284,6 +1288,8 @@ public: /// Add a global value summary for the given ValueInfo. void addGlobalValueSummary(ValueInfo VI, std::unique_ptr<GlobalValueSummary> Summary) { + if (const FunctionSummary *FS = dyn_cast<FunctionSummary>(Summary.get())) + HasParamAccess |= !FS->paramAccesses().empty(); addOriginalName(VI.getGUID(), Summary->getOriginalName()); // Here we have a notionally const VI, but the value it points to is owned // by the non-const *this. diff --git a/llvm/include/llvm/IR/OptBisect.h b/llvm/include/llvm/IR/OptBisect.h index 1b2b0bd7acaa..6c2a1b01d897 100644 --- a/llvm/include/llvm/IR/OptBisect.h +++ b/llvm/include/llvm/IR/OptBisect.h @@ -15,6 +15,7 @@ #define LLVM_IR_OPTBISECT_H #include "llvm/ADT/StringRef.h" +#include "llvm/Support/ManagedStatic.h" namespace llvm { @@ -32,7 +33,7 @@ public: return true; } - /// isEnabled should return true before calling shouldRunPass + /// isEnabled() should return true before calling shouldRunPass(). virtual bool isEnabled() const { return false; } }; @@ -55,6 +56,14 @@ public: /// Checks the bisect limit to determine if the specified pass should run. /// + /// This forwards to checkPass(). + bool shouldRunPass(const Pass *P, StringRef IRDescription) override; + + /// isEnabled() should return true before calling shouldRunPass(). + bool isEnabled() const override { return BisectEnabled; } + + /// Checks the bisect limit to determine if the specified pass should run. + /// /// If the bisect limit is set to -1, the function prints a message describing /// the pass and the bisect number assigned to it and return true. Otherwise, /// the function prints a message with the bisect number assigned to the @@ -64,17 +73,16 @@ public: /// Most passes should not call this routine directly. Instead, they are /// called through helper routines provided by the pass base classes. For /// instance, function passes should call FunctionPass::skipFunction(). - bool shouldRunPass(const Pass *P, StringRef IRDescription) override; - - /// isEnabled should return true before calling shouldRunPass - bool isEnabled() const override { return BisectEnabled; } -private: bool checkPass(const StringRef PassName, const StringRef TargetDesc); +private: bool BisectEnabled = false; unsigned LastBisectNum = 0; }; +/// Singleton instance of the OptBisect class, so multiple pass managers don't +/// need to coordinate their uses of OptBisect. +extern ManagedStatic<OptBisect> OptBisector; } // end namespace llvm #endif // LLVM_IR_OPTBISECT_H diff --git a/llvm/include/llvm/IR/PassInstrumentation.h b/llvm/include/llvm/IR/PassInstrumentation.h index bcc434548e67..291f324b159a 100644 --- a/llvm/include/llvm/IR/PassInstrumentation.h +++ b/llvm/include/llvm/IR/PassInstrumentation.h @@ -44,10 +44,6 @@ /// of a pass. For those callbacks returning false means pass will not be /// executed. /// -/// TODO: currently there is no way for a pass to opt-out of execution control -/// (e.g. become unskippable). PassManager is the only entity that determines -/// how pass instrumentation affects pass execution. -/// //===----------------------------------------------------------------------===// #ifndef LLVM_IR_PASSINSTRUMENTATION_H @@ -56,6 +52,7 @@ #include "llvm/ADT/Any.h" #include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" #include <type_traits> namespace llvm { @@ -71,13 +68,17 @@ public: // to take them as constant pointers, wrapped with llvm::Any. // For the case when IRUnit has been invalidated there is a different // callback to use - AfterPassInvalidated. + // We call all BeforePassFuncs to determine if a pass should run or not. + // BeforeNonSkippedPassFuncs are called only if the pass should run. // TODO: currently AfterPassInvalidated does not accept IRUnit, since passing - // already invalidated IRUnit is unsafe. There are ways to handle invalidated IRUnits - // in a safe way, and we might pursue that as soon as there is a useful instrumentation - // that needs it. + // already invalidated IRUnit is unsafe. There are ways to handle invalidated + // IRUnits in a safe way, and we might pursue that as soon as there is a + // useful instrumentation that needs it. using BeforePassFunc = bool(StringRef, Any); - using AfterPassFunc = void(StringRef, Any); - using AfterPassInvalidatedFunc = void(StringRef); + using BeforeSkippedPassFunc = void(StringRef, Any); + using BeforeNonSkippedPassFunc = void(StringRef, Any); + using AfterPassFunc = void(StringRef, Any, const PreservedAnalyses &); + using AfterPassInvalidatedFunc = void(StringRef, const PreservedAnalyses &); using BeforeAnalysisFunc = void(StringRef, Any); using AfterAnalysisFunc = void(StringRef, Any); @@ -88,8 +89,19 @@ public: PassInstrumentationCallbacks(const PassInstrumentationCallbacks &) = delete; void operator=(const PassInstrumentationCallbacks &) = delete; - template <typename CallableT> void registerBeforePassCallback(CallableT C) { - BeforePassCallbacks.emplace_back(std::move(C)); + template <typename CallableT> + void registerShouldRunOptionalPassCallback(CallableT C) { + ShouldRunOptionalPassCallbacks.emplace_back(std::move(C)); + } + + template <typename CallableT> + void registerBeforeSkippedPassCallback(CallableT C) { + BeforeSkippedPassCallbacks.emplace_back(std::move(C)); + } + + template <typename CallableT> + void registerBeforeNonSkippedPassCallback(CallableT C) { + BeforeNonSkippedPassCallbacks.emplace_back(std::move(C)); } template <typename CallableT> void registerAfterPassCallback(CallableT C) { @@ -111,17 +123,37 @@ public: AfterAnalysisCallbacks.emplace_back(std::move(C)); } + /// Add a class name to pass name mapping for use by pass instrumentation. + void addClassToPassName(StringRef ClassName, StringRef PassName); + /// Get the pass name for a given pass class name. + StringRef getPassNameForClassName(StringRef ClassName); + private: friend class PassInstrumentation; - SmallVector<llvm::unique_function<BeforePassFunc>, 4> BeforePassCallbacks; + /// These are only run on passes that are not required. They return false when + /// an optional pass should be skipped. + SmallVector<llvm::unique_function<BeforePassFunc>, 4> + ShouldRunOptionalPassCallbacks; + /// These are run on passes that are skipped. + SmallVector<llvm::unique_function<BeforeSkippedPassFunc>, 4> + BeforeSkippedPassCallbacks; + /// These are run on passes that are about to be run. + SmallVector<llvm::unique_function<BeforeNonSkippedPassFunc>, 4> + BeforeNonSkippedPassCallbacks; + /// These are run on passes that have just run. SmallVector<llvm::unique_function<AfterPassFunc>, 4> AfterPassCallbacks; + /// These are run passes that have just run on invalidated IR. SmallVector<llvm::unique_function<AfterPassInvalidatedFunc>, 4> AfterPassInvalidatedCallbacks; + /// These are run on analyses that are about to be run. SmallVector<llvm::unique_function<BeforeAnalysisFunc>, 4> BeforeAnalysisCallbacks; + /// These are run on analyses that have been run. SmallVector<llvm::unique_function<AfterAnalysisFunc>, 4> AfterAnalysisCallbacks; + + StringMap<std::string> ClassToPassName; }; /// This class provides instrumentation entry points for the Pass Manager, @@ -129,6 +161,26 @@ private: class PassInstrumentation { PassInstrumentationCallbacks *Callbacks; + // Template argument PassT of PassInstrumentation::runBeforePass could be two + // kinds: (1) a regular pass inherited from PassInfoMixin (happen when + // creating a adaptor pass for a regular pass); (2) a type-erased PassConcept + // created from (1). Here we want to make case (1) skippable unconditionally + // since they are regular passes. We call PassConcept::isRequired to decide + // for case (2). + template <typename PassT> + using has_required_t = decltype(std::declval<PassT &>().isRequired()); + + template <typename PassT> + static std::enable_if_t<is_detected<has_required_t, PassT>::value, bool> + isRequired(const PassT &Pass) { + return Pass.isRequired(); + } + template <typename PassT> + static std::enable_if_t<!is_detected<has_required_t, PassT>::value, bool> + isRequired(const PassT &Pass) { + return false; + } + public: /// Callbacks object is not owned by PassInstrumentation, its life-time /// should at least match the life-time of corresponding @@ -139,15 +191,28 @@ public: /// BeforePass instrumentation point - takes \p Pass instance to be executed /// and constant reference to IR it operates on. \Returns true if pass is - /// allowed to be executed. + /// allowed to be executed. These are only run on optional pass since required + /// passes must always be run. This allows these callbacks to print info when + /// they want to skip a pass. template <typename IRUnitT, typename PassT> bool runBeforePass(const PassT &Pass, const IRUnitT &IR) const { if (!Callbacks) return true; bool ShouldRun = true; - for (auto &C : Callbacks->BeforePassCallbacks) - ShouldRun &= C(Pass.name(), llvm::Any(&IR)); + if (!isRequired(Pass)) { + for (auto &C : Callbacks->ShouldRunOptionalPassCallbacks) + ShouldRun &= C(Pass.name(), llvm::Any(&IR)); + } + + if (ShouldRun) { + for (auto &C : Callbacks->BeforeNonSkippedPassCallbacks) + C(Pass.name(), llvm::Any(&IR)); + } else { + for (auto &C : Callbacks->BeforeSkippedPassCallbacks) + C(Pass.name(), llvm::Any(&IR)); + } + return ShouldRun; } @@ -155,20 +220,22 @@ public: /// just been executed and constant reference to \p IR it operates on. /// \p IR is guaranteed to be valid at this point. template <typename IRUnitT, typename PassT> - void runAfterPass(const PassT &Pass, const IRUnitT &IR) const { + void runAfterPass(const PassT &Pass, const IRUnitT &IR, + const PreservedAnalyses &PA) const { if (Callbacks) for (auto &C : Callbacks->AfterPassCallbacks) - C(Pass.name(), llvm::Any(&IR)); + C(Pass.name(), llvm::Any(&IR), PA); } /// AfterPassInvalidated instrumentation point - takes \p Pass instance /// that has just been executed. For use when IR has been invalidated /// by \p Pass execution. template <typename IRUnitT, typename PassT> - void runAfterPassInvalidated(const PassT &Pass) const { + void runAfterPassInvalidated(const PassT &Pass, + const PreservedAnalyses &PA) const { if (Callbacks) for (auto &C : Callbacks->AfterPassInvalidatedCallbacks) - C(Pass.name()); + C(Pass.name(), PA); } /// BeforeAnalysis instrumentation point - takes \p Analysis instance @@ -199,8 +266,20 @@ public: ExtraArgsT...) { return false; } + + template <typename CallableT> + void pushBeforeNonSkippedPassCallback(CallableT C) { + if (Callbacks) + Callbacks->BeforeNonSkippedPassCallbacks.emplace_back(std::move(C)); + } + void popBeforeNonSkippedPassCallback() { + if (Callbacks) + Callbacks->BeforeNonSkippedPassCallbacks.pop_back(); + } }; +bool isSpecialPass(StringRef PassID, const std::vector<StringRef> &Specials); + } // namespace llvm #endif diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h index 4d5f292ba9a1..c669565aa33b 100644 --- a/llvm/include/llvm/IR/PassManager.h +++ b/llvm/include/llvm/IR/PassManager.h @@ -38,6 +38,7 @@ #define LLVM_IR_PASSMANAGER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" @@ -510,10 +511,6 @@ public: if (!PI.runBeforePass<IRUnitT>(*P, IR)) continue; - if (DebugLogging) - dbgs() << "Running pass: " << P->name() << " on " << IR.getName() - << "\n"; - PreservedAnalyses PassPA; { TimeTraceScope TimeScope(P->name(), IR.getName()); @@ -522,7 +519,7 @@ public: // Call onto PassInstrumentation's AfterPass callbacks immediately after // running the pass. - PI.runAfterPass<IRUnitT>(*P, IR); + PI.runAfterPass<IRUnitT>(*P, IR, PassPA); // Update the analysis manager as each pass runs and potentially // invalidates analyses. @@ -551,7 +548,9 @@ public: return PA; } - template <typename PassT> void addPass(PassT Pass) { + template <typename PassT> + std::enable_if_t<!std::is_same<PassT, PassManager>::value> + addPass(PassT Pass) { using PassModelT = detail::PassModel<IRUnitT, PassT, PreservedAnalyses, AnalysisManagerT, ExtraArgTs...>; @@ -559,7 +558,24 @@ public: Passes.emplace_back(new PassModelT(std::move(Pass))); } -private: + /// When adding a pass manager pass that has the same type as this pass + /// manager, simply move the passes over. This is because we don't have use + /// cases rely on executing nested pass managers. Doing this could reduce + /// implementation complexity and avoid potential invalidation issues that may + /// happen with nested pass managers of the same type. + template <typename PassT> + std::enable_if_t<std::is_same<PassT, PassManager>::value> + addPass(PassT &&Pass) { + for (auto &P : Pass.Passes) + Passes.emplace_back(std::move(P)); + } + + /// Returns if the pass manager contains any passes. + bool isEmpty() const { return Passes.empty(); } + + static bool isRequired() { return true; } + +protected: using PassConceptT = detail::PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...>; @@ -649,7 +665,7 @@ public: /// when any of its embedded analysis results end up invalidated. We pass an /// \c Invalidator object as an argument to \c invalidate() in order to let /// the analysis results themselves define the dependency graph on the fly. - /// This lets us avoid building building an explicit representation of the + /// This lets us avoid building an explicit representation of the /// dependencies between analysis results. class Invalidator { public: @@ -844,7 +860,7 @@ public: return true; } - /// Invalidate a specific analysis pass for an IR module. + /// Invalidate a specific analysis pass for an IR unit. /// /// Note that the analysis result can disregard invalidation, if it determines /// it is in fact still valid. @@ -888,7 +904,7 @@ private: return RI == AnalysisResults.end() ? nullptr : &*RI->second->second; } - /// Invalidate a function pass result. + /// Invalidate a pass result for a IR unit. void invalidateImpl(AnalysisKey *ID, IRUnitT &IR) { typename AnalysisResultMapT::iterator RI = AnalysisResults.find({ID, &IR}); @@ -902,20 +918,20 @@ private: AnalysisResults.erase(RI); } - /// Map type from module analysis pass ID to pass concept pointer. + /// Map type from analysis pass ID to pass concept pointer. using AnalysisPassMapT = DenseMap<AnalysisKey *, std::unique_ptr<PassConceptT>>; - /// Collection of module analysis passes, indexed by ID. + /// Collection of analysis passes, indexed by ID. AnalysisPassMapT AnalysisPasses; - /// Map from function to a list of function analysis results. + /// Map from IR unit to a list of analysis results. /// - /// Provides linear time removal of all analysis results for a function and + /// Provides linear time removal of all analysis results for a IR unit and /// the ultimate storage for a particular cached analysis result. AnalysisResultListMapT AnalysisResultLists; - /// Map from an analysis ID and function to a particular cached + /// Map from an analysis ID and IR unit to a particular cached /// analysis result. AnalysisResultMapT AnalysisResults; @@ -1059,7 +1075,16 @@ extern template class InnerAnalysisManagerProxy<FunctionAnalysisManager, /// /// This proxy only exposes the const interface of the outer analysis manager, /// to indicate that you cannot cause an outer analysis to run from within an -/// inner pass. Instead, you must rely on the \c getCachedResult API. +/// inner pass. Instead, you must rely on the \c getCachedResult API. This is +/// due to keeping potential future concurrency in mind. To give an example, +/// running a module analysis before any function passes may give a different +/// result than running it in a function pass. Both may be valid, but it would +/// produce non-deterministic results. GlobalsAA is a good analysis example, +/// because the cached information has the mod/ref info for all memory for each +/// function at the time the analysis was computed. The information is still +/// valid after a function transformation, but it may be *different* if +/// recomputed after that transform. GlobalsAA is never invalidated. + /// /// This proxy doesn't manage invalidation in any way -- that is handled by the /// recursive return path of each layer of the pass manager. A consequence of @@ -1104,9 +1129,9 @@ public: for (auto &KeyValuePair : OuterAnalysisInvalidationMap) { AnalysisKey *OuterID = KeyValuePair.first; auto &InnerIDs = KeyValuePair.second; - InnerIDs.erase(llvm::remove_if(InnerIDs, [&](AnalysisKey *InnerID) { - return Inv.invalidate(InnerID, IRUnit, PA); }), - InnerIDs.end()); + llvm::erase_if(InnerIDs, [&](AnalysisKey *InnerID) { + return Inv.invalidate(InnerID, IRUnit, PA); + }); if (InnerIDs.empty()) DeadKeys.push_back(OuterID); } @@ -1130,9 +1155,7 @@ public: // analyses that all trigger invalidation on the same outer analysis, // this entire system should be changed to some other deterministic // data structure such as a `SetVector` of a pair of pointers. - auto InvalidatedIt = std::find(InvalidatedIDList.begin(), - InvalidatedIDList.end(), InvalidatedID); - if (InvalidatedIt == InvalidatedIDList.end()) + if (!llvm::is_contained(InvalidatedIDList, InvalidatedID)) InvalidatedIDList.push_back(InvalidatedID); } @@ -1205,71 +1228,34 @@ using ModuleAnalysisManagerFunctionProxy = /// Note that although function passes can access module analyses, module /// analyses are not invalidated while the function passes are running, so they /// may be stale. Function analyses will not be stale. -template <typename FunctionPassT> class ModuleToFunctionPassAdaptor - : public PassInfoMixin<ModuleToFunctionPassAdaptor<FunctionPassT>> { + : public PassInfoMixin<ModuleToFunctionPassAdaptor> { public: - explicit ModuleToFunctionPassAdaptor(FunctionPassT Pass) + using PassConceptT = detail::PassConcept<Function, FunctionAnalysisManager>; + + explicit ModuleToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass) : Pass(std::move(Pass)) {} /// Runs the function pass across every function in the module. - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) { - FunctionAnalysisManager &FAM = - AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); - - // Request PassInstrumentation from analysis manager, will use it to run - // instrumenting callbacks for the passes later. - PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M); - - PreservedAnalyses PA = PreservedAnalyses::all(); - for (Function &F : M) { - if (F.isDeclaration()) - continue; + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - // Check the PassInstrumentation's BeforePass callbacks before running the - // pass, skip its execution completely if asked to (callback returns - // false). - if (!PI.runBeforePass<Function>(Pass, F)) - continue; - - PreservedAnalyses PassPA; - { - TimeTraceScope TimeScope(Pass.name(), F.getName()); - PassPA = Pass.run(F, FAM); - } - - PI.runAfterPass(Pass, F); - - // We know that the function pass couldn't have invalidated any other - // function's analyses (that's the contract of a function pass), so - // directly handle the function analysis manager's invalidation here. - FAM.invalidate(F, PassPA); - - // Then intersect the preserved set so that invalidation of module - // analyses will eventually occur when the module pass completes. - PA.intersect(std::move(PassPA)); - } - - // The FunctionAnalysisManagerModuleProxy is preserved because (we assume) - // the function passes we ran didn't add or remove any functions. - // - // We also preserve all analyses on Functions, because we did all the - // invalidation we needed to do above. - PA.preserveSet<AllAnalysesOn<Function>>(); - PA.preserve<FunctionAnalysisManagerModuleProxy>(); - return PA; - } + static bool isRequired() { return true; } private: - FunctionPassT Pass; + std::unique_ptr<PassConceptT> Pass; }; /// A function to deduce a function pass type and wrap it in the /// templated adaptor. template <typename FunctionPassT> -ModuleToFunctionPassAdaptor<FunctionPassT> +ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT Pass) { - return ModuleToFunctionPassAdaptor<FunctionPassT>(std::move(Pass)); + using PassModelT = + detail::PassModel<Function, FunctionPassT, PreservedAnalyses, + FunctionAnalysisManager>; + + return ModuleToFunctionPassAdaptor( + std::make_unique<PassModelT>(std::move(Pass))); } /// A utility pass template to force an analysis result to be available. @@ -1300,6 +1286,7 @@ struct RequireAnalysisPass return PreservedAnalyses::all(); } + static bool isRequired() { return true; } }; /// A no-op pass template which simply forces a specific analysis result @@ -1360,8 +1347,9 @@ public: // false). if (!PI.runBeforePass<IRUnitT>(P, IR)) continue; - PA.intersect(P.run(IR, AM, std::forward<Ts>(Args)...)); - PI.runAfterPass(P, IR); + PreservedAnalyses IterPA = P.run(IR, AM, std::forward<Ts>(Args)...); + PA.intersect(IterPA); + PI.runAfterPass(P, IR, IterPA); } return PA; } diff --git a/llvm/include/llvm/IR/PassManagerImpl.h b/llvm/include/llvm/IR/PassManagerImpl.h index 978655ac69c4..71a86d1efb15 100644 --- a/llvm/include/llvm/IR/PassManagerImpl.h +++ b/llvm/include/llvm/IR/PassManagerImpl.h @@ -64,9 +64,6 @@ AnalysisManager<IRUnitT, ExtraArgTs...>::getResultImpl( // run it to produce a result, which we then add to the cache. if (Inserted) { auto &P = this->lookUpPass(ID); - if (DebugLogging) - dbgs() << "Running analysis: " << P.name() << " on " << IR.getName() - << "\n"; PassInstrumentation PI; if (ID != PassInstrumentationAnalysis::ID()) { @@ -97,10 +94,6 @@ inline void AnalysisManager<IRUnitT, ExtraArgTs...>::invalidate( if (PA.allAnalysesInSetPreserved<AllAnalysesOn<IRUnitT>>()) return; - if (DebugLogging) - dbgs() << "Invalidating all non-preserved analyses for: " << IR.getName() - << "\n"; - // Track whether each analysis's result is invalidated in // IsResultInvalidated. SmallDenseMap<AnalysisKey *, bool, 8> IsResultInvalidated; diff --git a/llvm/include/llvm/IR/PassManagerInternal.h b/llvm/include/llvm/IR/PassManagerInternal.h index c602c0b5cc20..986ed0b5a7ac 100644 --- a/llvm/include/llvm/IR/PassManagerInternal.h +++ b/llvm/include/llvm/IR/PassManagerInternal.h @@ -48,6 +48,12 @@ struct PassConcept { /// Polymorphic method to access the name of a pass. virtual StringRef name() const = 0; + + /// Polymorphic method to to let a pass optionally exempted from skipping by + /// PassInstrumentation. + /// To opt-in, pass should implement `static bool isRequired()`. It's no-op + /// to have `isRequired` always return false since that is the default. + virtual bool isRequired() const = 0; }; /// A template wrapper used to implement the polymorphic API. @@ -81,6 +87,22 @@ struct PassModel : PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...> { StringRef name() const override { return PassT::name(); } + template <typename T> + using has_required_t = decltype(std::declval<T &>().isRequired()); + + template <typename T> + static std::enable_if_t<is_detected<has_required_t, T>::value, bool> + passIsRequiredImpl() { + return T::isRequired(); + } + template <typename T> + static std::enable_if_t<!is_detected<has_required_t, T>::value, bool> + passIsRequiredImpl() { + return false; + } + + bool isRequired() const override { return passIsRequiredImpl<PassT>(); } + PassT Pass; }; diff --git a/llvm/include/llvm/IR/PassTimingInfo.h b/llvm/include/llvm/IR/PassTimingInfo.h index b70850fd64d7..e44321b4af66 100644 --- a/llvm/include/llvm/IR/PassTimingInfo.h +++ b/llvm/include/llvm/IR/PassTimingInfo.h @@ -17,11 +17,13 @@ #include "llvm/ADT/Any.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Timer.h" -#include "llvm/Support/TypeName.h" #include <memory> +#include <utility> + namespace llvm { class Pass; @@ -36,11 +38,6 @@ void reportAndResetTimings(raw_ostream *OutStream = nullptr); /// Request the timer for this legacy-pass-manager's pass instance. Timer *getPassTimer(Pass *); -/// If the user specifies the -time-passes argument on an LLVM tool command line -/// then the value of this boolean will be true, otherwise false. -/// This is the storage for the -time-passes option. -extern bool TimePassesIsEnabled; - /// This class implements -time-passes functionality for new pass manager. /// It provides the pass-instrumentation callbacks that measure the pass /// execution time. They collect timing info into individual timers as @@ -68,9 +65,11 @@ class TimePassesHandler { raw_ostream *OutStream = nullptr; bool Enabled; + bool PerRun; public: - TimePassesHandler(bool Enabled = TimePassesIsEnabled); + TimePassesHandler(); + TimePassesHandler(bool Enabled, bool PerRun = false); /// Destructor handles the print action if it has not been handled before. ~TimePassesHandler() { print(); } @@ -98,7 +97,7 @@ private: void stopTimer(StringRef PassID); // Implementation of pass instrumentation callbacks. - bool runBeforePass(StringRef PassID); + void runBeforePass(StringRef PassID); void runAfterPass(StringRef PassID); }; diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 4c11bc82510b..166ad23de969 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -88,16 +88,29 @@ inline class_match<BinaryOperator> m_BinOp() { /// Matches any compare instruction and ignore it. inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); } +/// Match an arbitrary undef constant. +inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); } + +/// Match an arbitrary poison constant. +inline class_match<PoisonValue> m_Poison() { return class_match<PoisonValue>(); } + +/// Match an arbitrary Constant and ignore it. +inline class_match<Constant> m_Constant() { return class_match<Constant>(); } + /// Match an arbitrary ConstantInt and ignore it. inline class_match<ConstantInt> m_ConstantInt() { return class_match<ConstantInt>(); } -/// Match an arbitrary undef constant. -inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); } +/// Match an arbitrary ConstantFP and ignore it. +inline class_match<ConstantFP> m_ConstantFP() { + return class_match<ConstantFP>(); +} -/// Match an arbitrary Constant and ignore it. -inline class_match<Constant> m_Constant() { return class_match<Constant>(); } +/// Match an arbitrary ConstantExpr and ignore it. +inline class_match<ConstantExpr> m_ConstantExpr() { + return class_match<ConstantExpr>(); +} /// Match an arbitrary basic block value and ignore it. inline class_match<BasicBlock> m_BasicBlock() { @@ -335,6 +348,33 @@ template <typename Predicate> struct api_pred_ty : public Predicate { } }; +/// This helper class is used to match scalar and vector constants that +/// satisfy a specified predicate, and bind them to an APFloat. +/// Undefs are allowed in splat vector constants. +template <typename Predicate> struct apf_pred_ty : public Predicate { + const APFloat *&Res; + + apf_pred_ty(const APFloat *&R) : Res(R) {} + + template <typename ITy> bool match(ITy *V) { + if (const auto *CI = dyn_cast<ConstantFP>(V)) + if (this->isValue(CI->getValue())) { + Res = &CI->getValue(); + return true; + } + if (V->getType()->isVectorTy()) + if (const auto *C = dyn_cast<Constant>(V)) + if (auto *CI = dyn_cast_or_null<ConstantFP>( + C->getSplatValue(/* AllowUndef */ true))) + if (this->isValue(CI->getValue())) { + Res = &CI->getValue(); + return true; + } + + return false; + } +}; + /////////////////////////////////////////////////////////////////////////////// // // Encapsulate constant value queries for use in templated predicate matchers. @@ -555,6 +595,15 @@ inline cstfp_pred_ty<is_nan> m_NaN() { return cstfp_pred_ty<is_nan>(); } +struct is_nonnan { + bool isValue(const APFloat &C) { return !C.isNaN(); } +}; +/// Match a non-NaN FP constant. +/// For vectors, this includes constants with undefined elements. +inline cstfp_pred_ty<is_nonnan> m_NonNaN() { + return cstfp_pred_ty<is_nonnan>(); +} + struct is_inf { bool isValue(const APFloat &C) { return C.isInfinity(); } }; @@ -564,6 +613,37 @@ inline cstfp_pred_ty<is_inf> m_Inf() { return cstfp_pred_ty<is_inf>(); } +struct is_noninf { + bool isValue(const APFloat &C) { return !C.isInfinity(); } +}; +/// Match a non-infinity FP constant, i.e. finite or NaN. +/// For vectors, this includes constants with undefined elements. +inline cstfp_pred_ty<is_noninf> m_NonInf() { + return cstfp_pred_ty<is_noninf>(); +} + +struct is_finite { + bool isValue(const APFloat &C) { return C.isFinite(); } +}; +/// Match a finite FP constant, i.e. not infinity or NaN. +/// For vectors, this includes constants with undefined elements. +inline cstfp_pred_ty<is_finite> m_Finite() { + return cstfp_pred_ty<is_finite>(); +} +inline apf_pred_ty<is_finite> m_Finite(const APFloat *&V) { return V; } + +struct is_finitenonzero { + bool isValue(const APFloat &C) { return C.isFiniteNonZero(); } +}; +/// Match a finite non-zero FP constant. +/// For vectors, this includes constants with undefined elements. +inline cstfp_pred_ty<is_finitenonzero> m_FiniteNonZero() { + return cstfp_pred_ty<is_finitenonzero>(); +} +inline apf_pred_ty<is_finitenonzero> m_FiniteNonZero(const APFloat *&V) { + return V; +} + struct is_any_zero_fp { bool isValue(const APFloat &C) { return C.isZero(); } }; @@ -591,6 +671,15 @@ inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() { return cstfp_pred_ty<is_neg_zero_fp>(); } +struct is_non_zero_fp { + bool isValue(const APFloat &C) { return C.isNonZero(); } +}; +/// Match a floating-point non-zero. +/// For vectors, this includes constants with undefined elements. +inline cstfp_pred_ty<is_non_zero_fp> m_NonZeroFP() { + return cstfp_pred_ty<is_non_zero_fp>(); +} + /////////////////////////////////////////////////////////////////////////////// template <typename Class> struct bind_ty { @@ -620,21 +709,38 @@ inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; } /// Match a with overflow intrinsic, capturing it if we match. inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; } -/// Match a ConstantInt, capturing the value if we match. -inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; } - /// Match a Constant, capturing the value if we match. inline bind_ty<Constant> m_Constant(Constant *&C) { return C; } +/// Match a ConstantInt, capturing the value if we match. +inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; } + /// Match a ConstantFP, capturing the value if we match. inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; } +/// Match a ConstantExpr, capturing the value if we match. +inline bind_ty<ConstantExpr> m_ConstantExpr(ConstantExpr *&C) { return C; } + /// Match a basic block value, capturing it if we match. inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; } inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) { return V; } +/// Match an arbitrary immediate Constant and ignore it. +inline match_combine_and<class_match<Constant>, + match_unless<class_match<ConstantExpr>>> +m_ImmConstant() { + return m_CombineAnd(m_Constant(), m_Unless(m_ConstantExpr())); +} + +/// Match an immediate Constant, capturing the value if we match. +inline match_combine_and<bind_ty<Constant>, + match_unless<class_match<ConstantExpr>>> +m_ImmConstant(Constant *&C) { + return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr())); +} + /// Match a specified Value*. struct specificval_ty { const Value *Val; @@ -705,6 +811,7 @@ struct bind_const_intval_ty { /// Match a specified integer value or vector of all elements of that /// value. +template <bool AllowUndefs> struct specific_intval { APInt Val; @@ -714,7 +821,7 @@ struct specific_intval { const auto *CI = dyn_cast<ConstantInt>(V); if (!CI && V->getType()->isVectorTy()) if (const auto *C = dyn_cast<Constant>(V)) - CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()); + CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue(AllowUndefs)); return CI && APInt::isSameValue(CI->getValue(), Val); } @@ -722,14 +829,22 @@ struct specific_intval { /// Match a specific integer value or vector with all elements equal to /// the value. -inline specific_intval m_SpecificInt(APInt V) { - return specific_intval(std::move(V)); +inline specific_intval<false> m_SpecificInt(APInt V) { + return specific_intval<false>(std::move(V)); } -inline specific_intval m_SpecificInt(uint64_t V) { +inline specific_intval<false> m_SpecificInt(uint64_t V) { return m_SpecificInt(APInt(64, V)); } +inline specific_intval<true> m_SpecificIntAllowUndef(APInt V) { + return specific_intval<true>(std::move(V)); +} + +inline specific_intval<true> m_SpecificIntAllowUndef(uint64_t V) { + return m_SpecificIntAllowUndef(APInt(64, V)); +} + /// Match a ConstantInt and bind to its value. This does not match /// ConstantInts wider than 64-bits. inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; } @@ -1442,6 +1557,12 @@ inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) { return CastClass_match<OpTy, Instruction::PtrToInt>(Op); } +/// Matches IntToPtr. +template <typename OpTy> +inline CastClass_match<OpTy, Instruction::IntToPtr> m_IntToPtr(const OpTy &Op) { + return CastClass_match<OpTy, Instruction::IntToPtr>(Op); +} + /// Matches Trunc. template <typename OpTy> inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) { @@ -1590,6 +1711,17 @@ struct MaxMin_match { MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} template <typename OpTy> bool match(OpTy *V) { + if (auto *II = dyn_cast<IntrinsicInst>(V)) { + Intrinsic::ID IID = II->getIntrinsicID(); + if ((IID == Intrinsic::smax && Pred_t::match(ICmpInst::ICMP_SGT)) || + (IID == Intrinsic::smin && Pred_t::match(ICmpInst::ICMP_SLT)) || + (IID == Intrinsic::umax && Pred_t::match(ICmpInst::ICMP_UGT)) || + (IID == Intrinsic::umin && Pred_t::match(ICmpInst::ICMP_ULT))) { + Value *LHS = II->getOperand(0), *RHS = II->getOperand(1); + return (L.match(LHS) && R.match(RHS)) || + (Commutable && L.match(RHS) && R.match(LHS)); + } + } // Look for "(x pred y) ? x : y" or "(x pred y) ? y : x". auto *SI = dyn_cast<SelectInst>(V); if (!SI) @@ -1697,6 +1829,17 @@ inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L, return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R); } +template <typename LHS, typename RHS> +inline match_combine_or< + match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>, + MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>>, + match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>, + MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>>> +m_MaxOrMin(const LHS &L, const RHS &R) { + return m_CombineOr(m_CombineOr(m_SMax(L, R), m_SMin(L, R)), + m_CombineOr(m_UMax(L, R), m_UMin(L, R))); +} + /// Match an 'ordered' floating point maximum function. /// Floating point has one special value 'NaN'. Therefore, there is no total /// order. However, if we can ignore the 'NaN' value (for example, because of a @@ -1987,6 +2130,18 @@ inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0, return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1); } +template <typename Opnd0, typename Opnd1, typename Opnd2> +inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty +m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) { + return m_Intrinsic<Intrinsic::fshl>(Op0, Op1, Op2); +} + +template <typename Opnd0, typename Opnd1, typename Opnd2> +inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty +m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) { + return m_Intrinsic<Intrinsic::fshr>(Op0, Op1, Op2); +} + //===----------------------------------------------------------------------===// // Matchers for two-operands operators with the operators in either order // @@ -2048,6 +2203,15 @@ m_Neg(const ValTy &V) { return m_Sub(m_ZeroInt(), V); } +/// Matches a 'Neg' as 'sub nsw 0, V'. +template <typename ValTy> +inline OverflowingBinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, + Instruction::Sub, + OverflowingBinaryOperator::NoSignedWrap> +m_NSWNeg(const ValTy &V) { + return m_NSWSub(m_ZeroInt(), V); +} + /// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'. template <typename ValTy> inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true> @@ -2080,6 +2244,17 @@ m_c_UMax(const LHS &L, const RHS &R) { return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R); } +template <typename LHS, typename RHS> +inline match_combine_or< + match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>, + MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>>, + match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>, + MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>>> +m_c_MaxOrMin(const LHS &L, const RHS &R) { + return m_CombineOr(m_CombineOr(m_c_SMax(L, R), m_c_SMin(L, R)), + m_CombineOr(m_c_UMax(L, R), m_c_UMin(L, R))); +} + /// Matches FAdd with LHS and RHS in either order. template <typename LHS, typename RHS> inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true> @@ -2153,6 +2328,29 @@ inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) { return ExtractValue_match<Ind, Val_t>(V); } +/// Matcher for a single index InsertValue instruction. +template <int Ind, typename T0, typename T1> struct InsertValue_match { + T0 Op0; + T1 Op1; + + InsertValue_match(const T0 &Op0, const T1 &Op1) : Op0(Op0), Op1(Op1) {} + + template <typename OpTy> bool match(OpTy *V) { + if (auto *I = dyn_cast<InsertValueInst>(V)) { + return Op0.match(I->getOperand(0)) && Op1.match(I->getOperand(1)) && + I->getNumIndices() == 1 && Ind == I->getIndices()[0]; + } + return false; + } +}; + +/// Matches a single index InsertValue instruction. +template <int Ind, typename Val_t, typename Elt_t> +inline InsertValue_match<Ind, Val_t, Elt_t> m_InsertValue(const Val_t &Val, + const Elt_t &Elt) { + return InsertValue_match<Ind, Val_t, Elt_t>(Val, Elt); +} + /// Matches patterns for `vscale`. This can either be a call to `llvm.vscale` or /// the constant expression /// `ptrtoint(gep <vscale x 1 x i8>, <vscale x 1 x i8>* null, i32 1>` @@ -2189,6 +2387,58 @@ inline VScaleVal_match m_VScale(const DataLayout &DL) { return VScaleVal_match(DL); } +template <typename LHS, typename RHS, unsigned Opcode> +struct LogicalOp_match { + LHS L; + RHS R; + + LogicalOp_match(const LHS &L, const RHS &R) : L(L), R(R) {} + + template <typename T> bool match(T *V) { + if (auto *I = dyn_cast<Instruction>(V)) { + if (!I->getType()->isIntOrIntVectorTy(1)) + return false; + + if (I->getOpcode() == Opcode && L.match(I->getOperand(0)) && + R.match(I->getOperand(1))) + return true; + + if (auto *SI = dyn_cast<SelectInst>(I)) { + if (Opcode == Instruction::And) { + if (const auto *C = dyn_cast<Constant>(SI->getFalseValue())) + if (C->isNullValue() && L.match(SI->getCondition()) && + R.match(SI->getTrueValue())) + return true; + } else { + assert(Opcode == Instruction::Or); + if (const auto *C = dyn_cast<Constant>(SI->getTrueValue())) + if (C->isOneValue() && L.match(SI->getCondition()) && + R.match(SI->getFalseValue())) + return true; + } + } + } + + return false; + } +}; + +/// Matches L && R either in the form of L & R or L ? R : false. +/// Note that the latter form is poison-blocking. +template <typename LHS, typename RHS> +inline LogicalOp_match<LHS, RHS, Instruction::And> +m_LogicalAnd(const LHS &L, const RHS &R) { + return LogicalOp_match<LHS, RHS, Instruction::And>(L, R); +} + +/// Matches L || R either in the form of L | R or L ? true : R. +/// Note that the latter form is poison-blocking. +template <typename LHS, typename RHS> +inline LogicalOp_match<LHS, RHS, Instruction::Or> +m_LogicalOr(const LHS &L, const RHS &R) { + return LogicalOp_match<LHS, RHS, Instruction::Or>(L, R); +} + } // end namespace PatternMatch } // end namespace llvm diff --git a/llvm/include/llvm/IR/PredIteratorCache.h b/llvm/include/llvm/IR/PredIteratorCache.h index cc835277910b..6bbd7e5e87a0 100644 --- a/llvm/include/llvm/IR/PredIteratorCache.h +++ b/llvm/include/llvm/IR/PredIteratorCache.h @@ -44,7 +44,7 @@ private: if (Entry) return Entry; - SmallVector<BasicBlock *, 32> PredCache(pred_begin(BB), pred_end(BB)); + SmallVector<BasicBlock *, 32> PredCache(predecessors(BB)); PredCache.push_back(nullptr); // null terminator. BlockToPredCountMap[BB] = PredCache.size() - 1; @@ -58,7 +58,7 @@ private: auto Result = BlockToPredCountMap.find(BB); if (Result != BlockToPredCountMap.end()) return Result->second; - return BlockToPredCountMap[BB] = std::distance(pred_begin(BB), pred_end(BB)); + return BlockToPredCountMap[BB] = pred_size(BB); } public: diff --git a/llvm/include/llvm/IR/PrintPasses.h b/llvm/include/llvm/IR/PrintPasses.h new file mode 100644 index 000000000000..1fa7c1893e20 --- /dev/null +++ b/llvm/include/llvm/IR/PrintPasses.h @@ -0,0 +1,44 @@ +//===- PrintPasses.h - Determining whether/when to print IR ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_PRINTPASSES_H +#define LLVM_IR_PRINTPASSES_H + +#include "llvm/ADT/StringRef.h" +#include <vector> + +namespace llvm { + +// Returns true if printing before/after some pass is enabled, whether all +// passes or a specific pass. +bool shouldPrintBeforeSomePass(); +bool shouldPrintAfterSomePass(); + +// Returns true if we should print before/after a specific pass. The argument +// should be the pass ID, e.g. "instcombine". +bool shouldPrintBeforePass(StringRef PassID); +bool shouldPrintAfterPass(StringRef PassID); + +// Returns true if we should print before/after all passes. +bool shouldPrintBeforeAll(); +bool shouldPrintAfterAll(); + +// The list of passes to print before/after, if we only want to print +// before/after specific passes. +std::vector<std::string> printBeforePasses(); +std::vector<std::string> printAfterPasses(); + +// Returns true if we should always print the entire module. +bool forcePrintModuleIR(); + +// Returns true if we should print the function. +bool isFunctionInPrintList(StringRef FunctionName); + +} // namespace llvm + +#endif // LLVM_IR_PRINTPASSES_H diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h new file mode 100644 index 000000000000..e0370c264102 --- /dev/null +++ b/llvm/include/llvm/IR/PseudoProbe.h @@ -0,0 +1,66 @@ +//===- PseudoProbe.h - Pseudo Probe IR Helpers ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Pseudo probe IR intrinsic and dwarf discriminator manipulation routines. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_PSEUDOPROBE_H +#define LLVM_IR_PSEUDOPROBE_H + +#include "llvm/ADT/Optional.h" +#include <cassert> +#include <cstdint> + +namespace llvm { + +class Instruction; + +constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc"; + +enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall }; + +struct PseudoProbeDwarfDiscriminator { + // The following APIs encodes/decodes per-probe information to/from a + // 32-bit integer which is organized as: + // [2:0] - 0x7, this is reserved for regular discriminator, + // see DWARF discriminator encoding rule + // [18:3] - probe id + // [25:19] - reserved + // [28:26] - probe type, see PseudoProbeType + // [31:29] - reserved for probe attributes + static uint32_t packProbeData(uint32_t Index, uint32_t Type) { + assert(Index <= 0xFFFF && "Probe index too big to encode, exceeding 2^16"); + assert(Type <= 0x7 && "Probe type too big to encode, exceeding 7"); + return (Index << 3) | (Type << 26) | 0x7; + } + + static uint32_t extractProbeIndex(uint32_t Value) { + return (Value >> 3) & 0xFFFF; + } + + static uint32_t extractProbeType(uint32_t Value) { + return (Value >> 26) & 0x7; + } + + static uint32_t extractProbeAttributes(uint32_t Value) { + return (Value >> 29) & 0x7; + } +}; + +struct PseudoProbe { + uint32_t Id; + uint32_t Type; + uint32_t Attr; +}; + +Optional<PseudoProbe> extractProbe(const Instruction &Inst); + +} // end namespace llvm + +#endif // LLVM_IR_PSEUDOPROBE_H diff --git a/llvm/include/llvm/IR/ReplaceConstant.h b/llvm/include/llvm/IR/ReplaceConstant.h new file mode 100644 index 000000000000..753f6d558ef8 --- /dev/null +++ b/llvm/include/llvm/IR/ReplaceConstant.h @@ -0,0 +1,28 @@ +//===- ReplaceConstant.h - Replacing LLVM constant expressions --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the utility function for replacing LLVM constant +// expressions by instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_REPLACECONSTANT_H +#define LLVM_IR_REPLACECONSTANT_H + +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instruction.h" + +namespace llvm { + +/// Create a replacement instruction for constant expression \p CE and insert +/// it before \p Instr. +Instruction *createReplacementInstr(ConstantExpr *CE, Instruction *Instr); + +} // end namespace llvm + +#endif // LLVM_IR_REPLACECONSTANT_H diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index 903db6c70498..c73172612b1e 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -286,7 +286,9 @@ HANDLE_LIBCALL(FPEXT_F64_PPCF128, "__gcc_dtoq") HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2") HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2") HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2") +HANDLE_LIBCALL(FPEXT_F16_F128, "__extendhftf2") HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2") +HANDLE_LIBCALL(FPEXT_F16_F64, "__extendhfdf2") HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee") HANDLE_LIBCALL(FPROUND_F32_F16, "__gnu_f2h_ieee") HANDLE_LIBCALL(FPROUND_F64_F16, "__truncdfhf2") @@ -301,6 +303,9 @@ HANDLE_LIBCALL(FPROUND_F80_F64, "__truncxfdf2") HANDLE_LIBCALL(FPROUND_F128_F64, "__trunctfdf2") HANDLE_LIBCALL(FPROUND_PPCF128_F64, "__gcc_qtod") HANDLE_LIBCALL(FPROUND_F128_F80, "__trunctfxf2") +HANDLE_LIBCALL(FPTOSINT_F16_I32, "__fixhfsi") +HANDLE_LIBCALL(FPTOSINT_F16_I64, "__fixhfdi") +HANDLE_LIBCALL(FPTOSINT_F16_I128, "__fixhfti") HANDLE_LIBCALL(FPTOSINT_F32_I32, "__fixsfsi") HANDLE_LIBCALL(FPTOSINT_F32_I64, "__fixsfdi") HANDLE_LIBCALL(FPTOSINT_F32_I128, "__fixsfti") @@ -316,6 +321,9 @@ HANDLE_LIBCALL(FPTOSINT_F128_I128, "__fixtfti") HANDLE_LIBCALL(FPTOSINT_PPCF128_I32, "__gcc_qtou") HANDLE_LIBCALL(FPTOSINT_PPCF128_I64, "__fixtfdi") HANDLE_LIBCALL(FPTOSINT_PPCF128_I128, "__fixtfti") +HANDLE_LIBCALL(FPTOUINT_F16_I32, "__fixunshfsi") +HANDLE_LIBCALL(FPTOUINT_F16_I64, "__fixunshfdi") +HANDLE_LIBCALL(FPTOUINT_F16_I128, "__fixunshfti") HANDLE_LIBCALL(FPTOUINT_F32_I32, "__fixunssfsi") HANDLE_LIBCALL(FPTOUINT_F32_I64, "__fixunssfdi") HANDLE_LIBCALL(FPTOUINT_F32_I128, "__fixunssfti") @@ -331,31 +339,37 @@ HANDLE_LIBCALL(FPTOUINT_F128_I128, "__fixunstfti") HANDLE_LIBCALL(FPTOUINT_PPCF128_I32, "__fixunstfsi") HANDLE_LIBCALL(FPTOUINT_PPCF128_I64, "__fixunstfdi") HANDLE_LIBCALL(FPTOUINT_PPCF128_I128, "__fixunstfti") +HANDLE_LIBCALL(SINTTOFP_I32_F16, "__floatsihf") HANDLE_LIBCALL(SINTTOFP_I32_F32, "__floatsisf") HANDLE_LIBCALL(SINTTOFP_I32_F64, "__floatsidf") HANDLE_LIBCALL(SINTTOFP_I32_F80, "__floatsixf") HANDLE_LIBCALL(SINTTOFP_I32_F128, "__floatsitf") HANDLE_LIBCALL(SINTTOFP_I32_PPCF128, "__gcc_itoq") +HANDLE_LIBCALL(SINTTOFP_I64_F16, "__floatdihf") HANDLE_LIBCALL(SINTTOFP_I64_F32, "__floatdisf") HANDLE_LIBCALL(SINTTOFP_I64_F64, "__floatdidf") HANDLE_LIBCALL(SINTTOFP_I64_F80, "__floatdixf") HANDLE_LIBCALL(SINTTOFP_I64_F128, "__floatditf") HANDLE_LIBCALL(SINTTOFP_I64_PPCF128, "__floatditf") +HANDLE_LIBCALL(SINTTOFP_I128_F16, "__floattihf") HANDLE_LIBCALL(SINTTOFP_I128_F32, "__floattisf") HANDLE_LIBCALL(SINTTOFP_I128_F64, "__floattidf") HANDLE_LIBCALL(SINTTOFP_I128_F80, "__floattixf") HANDLE_LIBCALL(SINTTOFP_I128_F128, "__floattitf") HANDLE_LIBCALL(SINTTOFP_I128_PPCF128, "__floattitf") +HANDLE_LIBCALL(UINTTOFP_I32_F16, "__floatunsihf") HANDLE_LIBCALL(UINTTOFP_I32_F32, "__floatunsisf") HANDLE_LIBCALL(UINTTOFP_I32_F64, "__floatunsidf") HANDLE_LIBCALL(UINTTOFP_I32_F80, "__floatunsixf") HANDLE_LIBCALL(UINTTOFP_I32_F128, "__floatunsitf") HANDLE_LIBCALL(UINTTOFP_I32_PPCF128, "__gcc_utoq") +HANDLE_LIBCALL(UINTTOFP_I64_F16, "__floatundihf") HANDLE_LIBCALL(UINTTOFP_I64_F32, "__floatundisf") HANDLE_LIBCALL(UINTTOFP_I64_F64, "__floatundidf") HANDLE_LIBCALL(UINTTOFP_I64_F80, "__floatundixf") HANDLE_LIBCALL(UINTTOFP_I64_F128, "__floatunditf") HANDLE_LIBCALL(UINTTOFP_I64_PPCF128, "__floatunditf") +HANDLE_LIBCALL(UINTTOFP_I128_F16, "__floatuntihf") HANDLE_LIBCALL(UINTTOFP_I128_F32, "__floatuntisf") HANDLE_LIBCALL(UINTTOFP_I128_F64, "__floatuntidf") HANDLE_LIBCALL(UINTTOFP_I128_F80, "__floatuntixf") @@ -544,6 +558,23 @@ HANDLE_LIBCALL(ATOMIC_FETCH_NAND_4, "__atomic_fetch_nand_4") HANDLE_LIBCALL(ATOMIC_FETCH_NAND_8, "__atomic_fetch_nand_8") HANDLE_LIBCALL(ATOMIC_FETCH_NAND_16, "__atomic_fetch_nand_16") +// Out-of-line atomics libcalls +#define HLCALLS(A, N) \ + HANDLE_LIBCALL(A##N##_RELAX, nullptr) \ + HANDLE_LIBCALL(A##N##_ACQ, nullptr) \ + HANDLE_LIBCALL(A##N##_REL, nullptr) \ + HANDLE_LIBCALL(A##N##_ACQ_REL, nullptr) +#define HLCALL5(A) \ + HLCALLS(A, 1) HLCALLS(A, 2) HLCALLS(A, 4) HLCALLS(A, 8) HLCALLS(A, 16) +HLCALL5(OUTLINE_ATOMIC_CAS) +HLCALL5(OUTLINE_ATOMIC_SWP) +HLCALL5(OUTLINE_ATOMIC_LDADD) +HLCALL5(OUTLINE_ATOMIC_LDSET) +HLCALL5(OUTLINE_ATOMIC_LDCLR) +HLCALL5(OUTLINE_ATOMIC_LDEOR) +#undef HLCALLS +#undef HLCALL5 + // Stack Protector Fail HANDLE_LIBCALL(STACKPROTECTOR_CHECK_FAIL, "__stack_chk_fail") @@ -555,4 +586,3 @@ HANDLE_LIBCALL(RETURN_ADDRESS, nullptr) HANDLE_LIBCALL(UNKNOWN_LIBCALL, nullptr) -#undef HANDLE_LIBCALL diff --git a/llvm/include/llvm/IR/Statepoint.h b/llvm/include/llvm/IR/Statepoint.h index 1ace39c10701..6ce15839df46 100644 --- a/llvm/include/llvm/IR/Statepoint.h +++ b/llvm/include/llvm/IR/Statepoint.h @@ -136,7 +136,7 @@ public: /// Return an end iterator of the arguments to the underlying call const_op_iterator actual_arg_end() const { auto I = actual_arg_begin() + actual_arg_size(); - assert((arg_end() - I) >= 0); + assert((arg_end() - I) == 2); return I; } /// range adapter for actual call arguments @@ -147,16 +147,12 @@ public: const_op_iterator gc_transition_args_begin() const { if (auto Opt = getOperandBundle(LLVMContext::OB_gc_transition)) return Opt->Inputs.begin(); - auto I = actual_arg_end() + 1; - assert((arg_end() - I) >= 0); - return I; + return arg_end(); } const_op_iterator gc_transition_args_end() const { if (auto Opt = getOperandBundle(LLVMContext::OB_gc_transition)) return Opt->Inputs.end(); - auto I = gc_transition_args_begin() + getNumDeoptArgs(); - assert((arg_end() - I) >= 0); - return I; + return arg_end(); } /// range adapter for GC transition arguments @@ -167,19 +163,12 @@ public: const_op_iterator deopt_begin() const { if (auto Opt = getOperandBundle(LLVMContext::OB_deopt)) return Opt->Inputs.begin(); - // The current format has two length prefix bundles between call args and - // start of gc args. This will be removed in the near future. - uint64_t NumTrans = getNumGCTransitionArgs(); - const_op_iterator I = actual_arg_end() + 2 + NumTrans; - assert((arg_end() - I) >= 0); - return I; + return arg_end(); } const_op_iterator deopt_end() const { if (auto Opt = getOperandBundle(LLVMContext::OB_deopt)) return Opt->Inputs.end(); - auto I = deopt_begin() + getNumDeoptArgs(); - assert((arg_end() - I) >= 0); - return I; + return arg_end(); } /// range adapter for vm state arguments @@ -192,30 +181,16 @@ public: const_op_iterator gc_args_begin() const { if (auto Opt = getOperandBundle(LLVMContext::OB_gc_live)) return Opt->Inputs.begin(); - - // The current format has two length prefix bundles between call args and - // start of gc args. This will be removed in the near future. - uint64_t NumTrans = getNumGCTransitionArgs(); - uint64_t NumDeopt = getNumDeoptArgs(); - auto I = actual_arg_end() + 2 + NumTrans + NumDeopt; - assert((arg_end() - I) >= 0); - return I; + return arg_end(); } /// Return an end iterator for the gc argument range const_op_iterator gc_args_end() const { if (auto Opt = getOperandBundle(LLVMContext::OB_gc_live)) return Opt->Inputs.end(); - return arg_end(); } - /// Return the operand index at which the gc args begin - unsigned gcArgsStartIdx() const { - assert(!getOperandBundle(LLVMContext::OB_gc_live)); - return gc_args_begin() - op_begin(); - } - /// range adapter for gc arguments iterator_range<const_op_iterator> gc_args() const { return make_range(gc_args_begin(), gc_args_end()); @@ -236,19 +211,6 @@ public: return GRI; return nullptr; } - -private: - int getNumGCTransitionArgs() const { - const Value *NumGCTransitionArgs = *actual_arg_end(); - return cast<ConstantInt>(NumGCTransitionArgs)->getZExtValue(); - } - - int getNumDeoptArgs() const { - uint64_t NumTrans = getNumGCTransitionArgs(); - const_op_iterator trans_end = actual_arg_end() + 1 + NumTrans; - const Value *NumDeoptArgs = *trans_end; - return cast<ConstantInt>(NumDeoptArgs)->getZExtValue(); - } }; /// Common base class for representing values projected from a statepoint. diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h new file mode 100644 index 000000000000..eb63a2140310 --- /dev/null +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -0,0 +1,34 @@ +//===- llvm/IR/StructuralHash.h - IR Hash for expensive checks --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides hashing of the LLVM IR structure to be used to check +// Passes modification status. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_STRUCTURALHASH_H +#define LLVM_IR_STRUCTURALHASH_H + +#ifdef EXPENSIVE_CHECKS + +#include <cstdint> + +// This header is only meant to be used when -DEXPENSIVE_CHECKS is set +namespace llvm { + +class Function; +class Module; + +uint64_t StructuralHash(const Function &F); +uint64_t StructuralHash(const Module &M); + +} // end namespace llvm + +#endif + +#endif // LLVM_IR_STRUCTURALHASH_H diff --git a/llvm/include/llvm/IR/SymbolTableListTraits.h b/llvm/include/llvm/IR/SymbolTableListTraits.h index 5b793e5dbf28..8af712374bfa 100644 --- a/llvm/include/llvm/IR/SymbolTableListTraits.h +++ b/llvm/include/llvm/IR/SymbolTableListTraits.h @@ -76,9 +76,11 @@ private: /// getListOwner - Return the object that owns this list. If this is a list /// of instructions, it returns the BasicBlock that owns them. ItemParentClass *getListOwner() { - size_t Offset(size_t(&((ItemParentClass*)nullptr->*ItemParentClass:: - getSublistAccess(static_cast<ValueSubClass*>(nullptr))))); - ListTy *Anchor(static_cast<ListTy *>(this)); + size_t Offset = reinterpret_cast<size_t>( + &((ItemParentClass *)nullptr->*ItemParentClass::getSublistAccess( + static_cast<ValueSubClass *>( + nullptr)))); + ListTy *Anchor = static_cast<ListTy *>(this); return reinterpret_cast<ItemParentClass*>(reinterpret_cast<char*>(Anchor)- Offset); } diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index 1f546884b924..756c69dd6ae9 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -65,6 +65,7 @@ public: LabelTyID, ///< Labels MetadataTyID, ///< Metadata X86_MMXTyID, ///< MMX vectors (64 bits, X86 specific) + X86_AMXTyID, ///< AMX vectors (8192 bits, X86 specific) TokenTyID, ///< Tokens // Derived types... see DerivedTypes.h file. @@ -182,6 +183,9 @@ public: /// Return true if this is X86 MMX. bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; } + /// Return true if this is X86 AMX. + bool isX86_AMXTy() const { return getTypeID() == X86_AMXTyID; } + /// Return true if this is a FP type or a vector of FP. bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); } @@ -252,7 +256,7 @@ public: /// includes all first-class types except struct and array types. bool isSingleValueType() const { return isFloatingPointTy() || isX86_MMXTy() || isIntegerTy() || - isPointerTy() || isVectorTy(); + isPointerTy() || isVectorTy() || isX86_AMXTy(); } /// Return true if the type is an aggregate type. This means it is valid as @@ -268,8 +272,8 @@ public: bool isSized(SmallPtrSetImpl<Type*> *Visited = nullptr) const { // If it's a primitive, it is always sized. if (getTypeID() == IntegerTyID || isFloatingPointTy() || - getTypeID() == PointerTyID || - getTypeID() == X86_MMXTyID) + getTypeID() == PointerTyID || getTypeID() == X86_MMXTyID || + getTypeID() == X86_AMXTyID) return true; // If it is not something that can have a size (e.g. a function or label), // it doesn't have a size. @@ -405,6 +409,7 @@ public: static Type *getFP128Ty(LLVMContext &C); static Type *getPPC_FP128Ty(LLVMContext &C); static Type *getX86_MMXTy(LLVMContext &C); + static Type *getX86_AMXTy(LLVMContext &C); static Type *getTokenTy(LLVMContext &C); static IntegerType *getIntNTy(LLVMContext &C, unsigned N); static IntegerType *getInt1Ty(LLVMContext &C); @@ -427,6 +432,26 @@ public: } llvm_unreachable("Unsupported type in Type::getScalarTy"); } + static Type *getFloatingPointTy(LLVMContext &C, const fltSemantics &S) { + Type *Ty; + if (&S == &APFloat::IEEEhalf()) + Ty = Type::getHalfTy(C); + else if (&S == &APFloat::BFloat()) + Ty = Type::getBFloatTy(C); + else if (&S == &APFloat::IEEEsingle()) + Ty = Type::getFloatTy(C); + else if (&S == &APFloat::IEEEdouble()) + Ty = Type::getDoubleTy(C); + else if (&S == &APFloat::x87DoubleExtended()) + Ty = Type::getX86_FP80Ty(C); + else if (&S == &APFloat::IEEEquad()) + Ty = Type::getFP128Ty(C); + else { + assert(&S == &APFloat::PPCDoubleDouble() && "Unknown FP format"); + Ty = Type::getPPC_FP128Ty(C); + } + return Ty; + } //===--------------------------------------------------------------------===// // Convenience methods for getting pointer types with one of the above builtin @@ -440,6 +465,7 @@ public: static PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0); static PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0); static PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0); + static PointerType *getX86_AMXPtrTy(LLVMContext &C, unsigned AS = 0); static PointerType *getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS = 0); static PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0); static PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0); diff --git a/llvm/include/llvm/IR/User.h b/llvm/include/llvm/IR/User.h index ebfae1db2980..221bb5b2cb1c 100644 --- a/llvm/include/llvm/IR/User.h +++ b/llvm/include/llvm/IR/User.h @@ -45,7 +45,7 @@ class User : public Value { template <unsigned> friend struct HungoffOperandTraits; - LLVM_ATTRIBUTE_ALWAYS_INLINE inline static void * + LLVM_ATTRIBUTE_ALWAYS_INLINE static void * allocateFixedOperandUser(size_t, unsigned, unsigned); protected: diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index d3e1fc854373..981548c6dde9 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -17,68 +17,140 @@ // Provide definitions of macros so that users of this file do not have to // define everything to use it... // -#ifndef REGISTER_VP_INTRINSIC -#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) +// Register a VP intrinsic and begin its property scope. +// All VP intrinsic scopes are top level, ie it is illegal to place a +// BEGIN_REGISTER_VP_INTRINSIC within a VP intrinsic scope. +// \p VPID The VP intrinsic id. +// \p MASKPOS The mask operand position. +// \p EVLPOS The explicit vector length operand position. +#ifndef BEGIN_REGISTER_VP_INTRINSIC +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, EVLPOS) #endif -// Map this VP intrinsic to its functional Opcode -#ifndef HANDLE_VP_TO_OC -#define HANDLE_VP_TO_OC(VPID, OC) +// End the property scope of a VP intrinsic. +#ifndef END_REGISTER_VP_INTRINSIC +#define END_REGISTER_VP_INTRINSIC(VPID) #endif -///// Integer Arithmetic ///// +// Register a new VP SDNode and begin its property scope. +// When the SDNode scope is nested within a VP intrinsic scope, it is implicitly registered as the canonical SDNode for this VP intrinsic. +// There is one VP intrinsic that maps directly to one SDNode that goes by the +// same name. Since the operands are also the same, we open the property +// scopes for both the VPIntrinsic and the SDNode at once. +// \p SDOPC The SelectionDAG Node id (eg VP_ADD). +// \p LEGALPOS The operand position of the SDNode that is used for legalizing +// this SDNode. This can be `-1`, in which case the return type of +// the SDNode is used. +// \p TDNAME The name of the TableGen definition of this SDNode. +// \p MASKPOS The mask operand position. +// \p EVLPOS The explicit vector length operand position. +#ifndef BEGIN_REGISTER_VP_SDNODE +#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS) +#endif + +// End the property scope of a new VP SDNode. +#ifndef END_REGISTER_VP_SDNODE +#define END_REGISTER_VP_SDNODE(SDOPC) +#endif + +// Helper macros for the common "1:1 - Intrinsic : SDNode" case. +// +// There is one VP intrinsic that maps directly to one SDNode that goes by the +// same name. Since the operands are also the same, we open the property +// scopes for both the VPIntrinsic and the SDNode at once. +// +// \p INTRIN The canonical name (eg `vp_add`, which at the same time is the +// name of the intrinsic and the TableGen def of the SDNode). +// \p MASKPOS The mask operand position. +// \p EVLPOS The explicit vector length operand position. +// \p SDOPC The SelectionDAG Node id (eg VP_ADD). +// \p LEGALPOS The operand position of the SDNode that is used for legalizing +// this SDNode. This can be `-1`, in which case the return type of +// the SDNode is used. +#define BEGIN_REGISTER_VP(INTRIN, MASKPOS, EVLPOS, SDOPC, LEGALPOS) \ +BEGIN_REGISTER_VP_INTRINSIC(INTRIN, MASKPOS, EVLPOS) \ +BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, INTRIN, MASKPOS, EVLPOS) + +#define END_REGISTER_VP(INTRIN, SDOPC) \ +END_REGISTER_VP_INTRINSIC(INTRIN) \ +END_REGISTER_VP_SDNODE(SDOPC) + + +// The following macros attach properties to the scope they are placed in. This +// assigns the property to the VP Intrinsic and/or SDNode that belongs to the +// scope. +// +// Property Macros { + +// The intrinsic and/or SDNode has the same function as this LLVM IR Opcode. +// \p OPC The standard IR opcode. +#ifndef HANDLE_VP_TO_OPC +#define HANDLE_VP_TO_OPC(OPC) +#endif + +/// } Property Macros + +///// Integer Arithmetic { + +// Specialized helper macro for integer binary operators (%x, %y, %mask, %evl). +#ifdef HELPER_REGISTER_BINARY_INT_VP +#error "The internal helper macro HELPER_REGISTER_BINARY_INT_VP is already defined!" +#endif +#define HELPER_REGISTER_BINARY_INT_VP(INTRIN, SDOPC, OPC) \ +BEGIN_REGISTER_VP(INTRIN, 2, 3, SDOPC, -1) \ +HANDLE_VP_TO_OPC(OPC) \ +END_REGISTER_VP(INTRIN, SDOPC) + + // llvm.vp.add(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_add, 2, 3) -HANDLE_VP_TO_OC(vp_add, Add) +HELPER_REGISTER_BINARY_INT_VP(vp_add, VP_ADD, Add) // llvm.vp.and(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_and, 2, 3) -HANDLE_VP_TO_OC(vp_and, And) +HELPER_REGISTER_BINARY_INT_VP(vp_and, VP_AND, And) // llvm.vp.ashr(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_ashr, 2, 3) -HANDLE_VP_TO_OC(vp_ashr, AShr) +HELPER_REGISTER_BINARY_INT_VP(vp_ashr, VP_ASHR, AShr) // llvm.vp.lshr(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_lshr, 2, 3) -HANDLE_VP_TO_OC(vp_lshr, LShr) +HELPER_REGISTER_BINARY_INT_VP(vp_lshr, VP_LSHR, LShr) // llvm.vp.mul(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_mul, 2, 3) -HANDLE_VP_TO_OC(vp_mul, Mul) +HELPER_REGISTER_BINARY_INT_VP(vp_mul, VP_MUL, Mul) // llvm.vp.or(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_or, 2, 3) -HANDLE_VP_TO_OC(vp_or, Or) +HELPER_REGISTER_BINARY_INT_VP(vp_or, VP_OR, Or) // llvm.vp.sdiv(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_sdiv, 2, 3) -HANDLE_VP_TO_OC(vp_sdiv, SDiv) +HELPER_REGISTER_BINARY_INT_VP(vp_sdiv, VP_SDIV, SDiv) // llvm.vp.shl(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_shl, 2, 3) -HANDLE_VP_TO_OC(vp_shl, Shl) +HELPER_REGISTER_BINARY_INT_VP(vp_shl, VP_SHL, Shl) // llvm.vp.srem(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_srem, 2, 3) -HANDLE_VP_TO_OC(vp_srem, SRem) +HELPER_REGISTER_BINARY_INT_VP(vp_srem, VP_SREM, SRem) // llvm.vp.sub(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_sub, 2, 3) -HANDLE_VP_TO_OC(vp_sub, Sub) +HELPER_REGISTER_BINARY_INT_VP(vp_sub, VP_SUB, Sub) // llvm.vp.udiv(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_udiv, 2, 3) -HANDLE_VP_TO_OC(vp_udiv, UDiv) +HELPER_REGISTER_BINARY_INT_VP(vp_udiv, VP_UDIV, UDiv) // llvm.vp.urem(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_urem, 2, 3) -HANDLE_VP_TO_OC(vp_urem, URem) +HELPER_REGISTER_BINARY_INT_VP(vp_urem, VP_UREM, URem) // llvm.vp.xor(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_xor, 2, 3) -HANDLE_VP_TO_OC(vp_xor, Xor) +HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor) + +#undef HELPER_REGISTER_BINARY_INT_VP + +///// } Integer Arithmetic + -#undef REGISTER_VP_INTRINSIC -#undef HANDLE_VP_TO_OC +#undef BEGIN_REGISTER_VP +#undef BEGIN_REGISTER_VP_INTRINSIC +#undef BEGIN_REGISTER_VP_SDNODE +#undef END_REGISTER_VP +#undef END_REGISTER_VP_INTRINSIC +#undef END_REGISTER_VP_SDNODE +#undef HANDLE_VP_TO_OPC diff --git a/llvm/include/llvm/IR/Value.def b/llvm/include/llvm/IR/Value.def index aaf1651979a9..0a0125d319c3 100644 --- a/llvm/include/llvm/IR/Value.def +++ b/llvm/include/llvm/IR/Value.def @@ -23,6 +23,11 @@ #error "Missing macro definition of HANDLE_VALUE*" #endif +// If the LLVM_C_API macro is set, then values handled via HANDLE_*_EXCLUDE_LLVM_C_API will not be expanded in areas the HANDLE_* macro is used. If it is not set, then HANDLE_*_EXCLUDE_LLVM_C_API values are handled normally as their HANDLE_* counterparts. +#ifndef LLVM_C_API +#define LLVM_C_API 0 +#endif + #ifndef HANDLE_MEMORY_VALUE #define HANDLE_MEMORY_VALUE(ValueName) HANDLE_VALUE(ValueName) #endif @@ -55,6 +60,15 @@ #define HANDLE_CONSTANT_MARKER(MarkerName, ValueName) #endif +#ifndef HANDLE_CONSTANT_EXCLUDE_LLVM_C_API +#define HANDLE_CONSTANT_EXCLUDE_LLVM_C_API(ValueName) HANDLE_CONSTANT(ValueName) +#endif + +#if LLVM_C_API +#undef HANDLE_CONSTANT_EXCLUDE_LLVM_C_API +#define HANDLE_CONSTANT_EXCLUDE_LLVM_C_API(ValueName) +#endif + // Having constant first makes the range check for isa<Constant> faster // and smaller by one operation. @@ -65,6 +79,7 @@ HANDLE_GLOBAL_VALUE(GlobalIFunc) HANDLE_GLOBAL_VALUE(GlobalVariable) HANDLE_CONSTANT(BlockAddress) HANDLE_CONSTANT(ConstantExpr) +HANDLE_CONSTANT_EXCLUDE_LLVM_C_API(DSOLocalEquivalent) // ConstantAggregate. HANDLE_CONSTANT(ConstantArray) @@ -73,6 +88,7 @@ HANDLE_CONSTANT(ConstantVector) // ConstantData. HANDLE_CONSTANT(UndefValue) +HANDLE_CONSTANT(PoisonValue) HANDLE_CONSTANT(ConstantAggregateZero) HANDLE_CONSTANT(ConstantDataArray) HANDLE_CONSTANT(ConstantDataVector) @@ -114,3 +130,5 @@ HANDLE_INSTRUCTION(Instruction) #undef HANDLE_INLINE_ASM_VALUE #undef HANDLE_VALUE #undef HANDLE_CONSTANT_MARKER +#undef HANDLE_CONSTANT_EXCLUDE_LLVM_C_API +#undef LLVM_C_API diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h index 04ca68274626..2a9912d46c89 100644 --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -15,6 +15,7 @@ #include "llvm-c/Types.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Use.h" #include "llvm/Support/Alignment.h" @@ -43,11 +44,11 @@ class GlobalVariable; class InlineAsm; class Instruction; class LLVMContext; +class MDNode; class Module; class ModuleSlotTracker; class raw_ostream; template<typename ValueTy> class StringMapEntry; -class StringRef; class Twine; class Type; class User; @@ -110,12 +111,13 @@ protected: /// /// Note, this should *NOT* be used directly by any class other than User. /// User uses this value to find the Use list. - enum : unsigned { NumUserOperandsBits = 28 }; + enum : unsigned { NumUserOperandsBits = 27 }; unsigned NumUserOperands : NumUserOperandsBits; // Use the same type as the bitfield above so that MSVC will pack them. unsigned IsUsedByMD : 1; unsigned HasName : 1; + unsigned HasMetadata : 1; // Has metadata attached to this? unsigned HasHungOffUses : 1; unsigned HasDescriptor : 1; @@ -279,6 +281,10 @@ public: /// \note It is an error to call V->takeName(V). void takeName(Value *V); +#ifndef NDEBUG + std::string getNameOrAsOperand() const; +#endif + /// Change all uses of this to point to a new Value. /// /// Go through the uses list for this definition and make each use point to @@ -424,25 +430,31 @@ public: return materialized_users(); } - /// Return true if there is exactly one user of this value. + /// Return true if there is exactly one use of this value. /// /// This is specialized because it is a common request and does not require /// traversing the whole use list. - bool hasOneUse() const { - const_use_iterator I = use_begin(), E = use_end(); - if (I == E) return false; - return ++I == E; - } + bool hasOneUse() const { return hasSingleElement(uses()); } - /// Return true if this Value has exactly N users. + /// Return true if this Value has exactly N uses. bool hasNUses(unsigned N) const; - /// Return true if this value has N users or more. + /// Return true if this value has N uses or more. /// /// This is logically equivalent to getNumUses() >= N. bool hasNUsesOrMore(unsigned N) const; - /// Return true if there is exactly one user of this value that cannot be + /// Return true if there is exactly one user of this value. + /// + /// Note that this is not the same as "has one use". If a value has one use, + /// then there certainly is a single user. But if value has several uses, + /// it is possible that all uses are in a single user, or not. + /// + /// This check is potentially costly, since it requires traversing, + /// in the worst case, the whole use list of a value. + bool hasOneUser() const; + + /// Return true if there is exactly one use of this value that cannot be /// dropped. /// /// This is specialized because it is a common request and does not require @@ -455,7 +467,7 @@ public: /// traversing the whole use list. bool hasNUndroppableUses(unsigned N) const; - /// Return true if this value has N users or more. + /// Return true if this value has N uses or more. /// /// This is logically equivalent to getNumUses() >= N. bool hasNUndroppableUsesOrMore(unsigned N) const; @@ -470,6 +482,12 @@ public: void dropDroppableUses(llvm::function_ref<bool(const Use *)> ShouldDrop = [](const Use *) { return true; }); + /// Remove every use of this value in \p User that can safely be removed. + void dropDroppableUsesIn(User &Usr); + + /// Remove the droppable use \p U. + static void dropDroppableUse(Use &U); + /// Check if this value is used in the specified basic block. bool isUsedInBasicBlock(const BasicBlock *BB) const; @@ -534,6 +552,68 @@ public: /// Return true if there is metadata referencing this value. bool isUsedByMetadata() const { return IsUsedByMD; } +protected: + /// Get the current metadata attachments for the given kind, if any. + /// + /// These functions require that the value have at most a single attachment + /// of the given kind, and return \c nullptr if such an attachment is missing. + /// @{ + MDNode *getMetadata(unsigned KindID) const; + MDNode *getMetadata(StringRef Kind) const; + /// @} + + /// Appends all attachments with the given ID to \c MDs in insertion order. + /// If the Value has no attachments with the given ID, or if ID is invalid, + /// leaves MDs unchanged. + /// @{ + void getMetadata(unsigned KindID, SmallVectorImpl<MDNode *> &MDs) const; + void getMetadata(StringRef Kind, SmallVectorImpl<MDNode *> &MDs) const; + /// @} + + /// Appends all metadata attached to this value to \c MDs, sorting by + /// KindID. The first element of each pair returned is the KindID, the second + /// element is the metadata value. Attachments with the same ID appear in + /// insertion order. + void + getAllMetadata(SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs) const; + + /// Return true if this value has any metadata attached to it. + bool hasMetadata() const { return (bool)HasMetadata; } + + /// Return true if this value has the given type of metadata attached. + /// @{ + bool hasMetadata(unsigned KindID) const { + return getMetadata(KindID) != nullptr; + } + bool hasMetadata(StringRef Kind) const { + return getMetadata(Kind) != nullptr; + } + /// @} + + /// Set a particular kind of metadata attachment. + /// + /// Sets the given attachment to \c MD, erasing it if \c MD is \c nullptr or + /// replacing it if it already exists. + /// @{ + void setMetadata(unsigned KindID, MDNode *Node); + void setMetadata(StringRef Kind, MDNode *Node); + /// @} + + /// Add a metadata attachment. + /// @{ + void addMetadata(unsigned KindID, MDNode &MD); + void addMetadata(StringRef Kind, MDNode &MD); + /// @} + + /// Erase all metadata attachments with the given kind. + /// + /// \returns true if any metadata was removed. + bool eraseMetadata(unsigned KindID); + + /// Erase all metadata attached to this Value. + void clearMetadata(); + +public: /// Return true if this value is a swifterror value. /// /// swifterror values can be either a function argument or an alloca with a diff --git a/llvm/include/llvm/IR/ValueHandle.h b/llvm/include/llvm/IR/ValueHandle.h index badc1ca8d1f6..29560815ea55 100644 --- a/llvm/include/llvm/IR/ValueHandle.h +++ b/llvm/include/llvm/IR/ValueHandle.h @@ -258,13 +258,13 @@ template <> struct simplify_type<const WeakTrackingVH> { /// class turns into a trivial wrapper around a pointer. template <typename ValueTy> class AssertingVH -#ifndef NDEBUG - : public ValueHandleBase +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + : public ValueHandleBase #endif - { +{ friend struct DenseMapInfo<AssertingVH<ValueTy>>; -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS Value *getRawValPtr() const { return ValueHandleBase::getValPtr(); } void setRawValPtr(Value *P) { ValueHandleBase::operator=(P); } #else @@ -280,14 +280,14 @@ class AssertingVH void setValPtr(ValueTy *P) { setRawValPtr(GetAsValue(P)); } public: -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS AssertingVH() : ValueHandleBase(Assert) {} AssertingVH(ValueTy *P) : ValueHandleBase(Assert, GetAsValue(P)) {} AssertingVH(const AssertingVH &RHS) : ValueHandleBase(Assert, RHS) {} #else AssertingVH() : ThePtr(nullptr) {} AssertingVH(ValueTy *P) : ThePtr(GetAsValue(P)) {} - AssertingVH(const AssertingVH<ValueTy> &) = default; + AssertingVH(const AssertingVH &) = default; #endif operator ValueTy*() const { @@ -442,9 +442,9 @@ public: /// PoisoningVH's as it moves. This is required because in non-assert mode this /// class turns into a trivial wrapper around a pointer. template <typename ValueTy> -class PoisoningVH -#ifndef NDEBUG - final : public CallbackVH +class PoisoningVH final +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + : public CallbackVH #endif { friend struct DenseMapInfo<PoisoningVH<ValueTy>>; @@ -453,7 +453,7 @@ class PoisoningVH static Value *GetAsValue(Value *V) { return V; } static Value *GetAsValue(const Value *V) { return const_cast<Value *>(V); } -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS /// A flag tracking whether this value has been poisoned. /// /// On delete and RAUW, we leave the value pointer alone so that as a raw @@ -478,7 +478,7 @@ class PoisoningVH Poisoned = true; RemoveFromUseList(); } -#else // NDEBUG +#else // LLVM_ENABLE_ABI_BREAKING_CHECKS Value *ThePtr = nullptr; Value *getRawValPtr() const { return ThePtr; } @@ -486,14 +486,16 @@ class PoisoningVH #endif ValueTy *getValPtr() const { +#if LLVM_ENABLE_ABI_BREAKING_CHECKS assert(!Poisoned && "Accessed a poisoned value handle!"); +#endif return static_cast<ValueTy *>(getRawValPtr()); } void setValPtr(ValueTy *P) { setRawValPtr(GetAsValue(P)); } public: PoisoningVH() = default; -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS PoisoningVH(ValueTy *P) : CallbackVH(GetAsValue(P)) {} PoisoningVH(const PoisoningVH &RHS) : CallbackVH(RHS), Poisoned(RHS.Poisoned) {} diff --git a/llvm/include/llvm/IR/Verifier.h b/llvm/include/llvm/IR/Verifier.h index 62c33c8325eb..f4381d2ae4a9 100644 --- a/llvm/include/llvm/IR/Verifier.h +++ b/llvm/include/llvm/IR/Verifier.h @@ -116,6 +116,7 @@ public: Result run(Module &M, ModuleAnalysisManager &); Result run(Function &F, FunctionAnalysisManager &); + static bool isRequired() { return true; } }; /// Check a module for errors, but report debug info errors separately. @@ -141,6 +142,7 @@ public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 06e8507036ac..4f89179a03de 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -73,17 +73,19 @@ void initializeAlignmentFromAssumptionsPass(PassRegistry&); void initializeAlwaysInlinerLegacyPassPass(PassRegistry&); void initializeAssumeSimplifyPassLegacyPassPass(PassRegistry &); void initializeAssumeBuilderPassLegacyPassPass(PassRegistry &); +void initializeAnnotation2MetadataLegacyPass(PassRegistry &); +void initializeAnnotationRemarksLegacyPass(PassRegistry &); void initializeOpenMPOptLegacyPassPass(PassRegistry &); void initializeArgPromotionPass(PassRegistry&); void initializeAssumptionCacheTrackerPass(PassRegistry&); void initializeAtomicExpandPass(PassRegistry&); void initializeAttributorLegacyPassPass(PassRegistry&); void initializeAttributorCGSCCLegacyPassPass(PassRegistry &); -void initializeBBSectionsPreparePass(PassRegistry &); +void initializeBasicBlockSectionsPass(PassRegistry &); void initializeBDCELegacyPassPass(PassRegistry&); void initializeBarrierNoopPass(PassRegistry&); void initializeBasicAAWrapperPassPass(PassRegistry&); -void initializeBlockExtractorPass(PassRegistry &); +void initializeBlockExtractorLegacyPassPass(PassRegistry &); void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&); void initializeBoundsCheckingLegacyPassPass(PassRegistry&); void initializeBranchFolderPassPass(PassRegistry&); @@ -110,10 +112,11 @@ void initializeCallGraphViewerPass(PassRegistry&); void initializeCallGraphWrapperPassPass(PassRegistry&); void initializeCallSiteSplittingLegacyPassPass(PassRegistry&); void initializeCalledValuePropagationLegacyPassPass(PassRegistry &); +void initializeCheckDebugMachineModulePass(PassRegistry &); void initializeCodeGenPreparePass(PassRegistry&); void initializeConstantHoistingLegacyPassPass(PassRegistry&); void initializeConstantMergeLegacyPassPass(PassRegistry&); -void initializeConstantPropagationPass(PassRegistry&); +void initializeConstraintEliminationPass(PassRegistry &); void initializeControlHeightReductionLegacyPassPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeCostModelAnalysisPass(PassRegistry&); @@ -122,8 +125,7 @@ void initializeDAEPass(PassRegistry&); void initializeDAHPass(PassRegistry&); void initializeDCELegacyPassPass(PassRegistry&); void initializeDSELegacyPassPass(PassRegistry&); -void initializeDataFlowSanitizerPass(PassRegistry&); -void initializeDeadInstEliminationPass(PassRegistry&); +void initializeDataFlowSanitizerLegacyPassPass(PassRegistry &); void initializeDeadMachineInstructionElimPass(PassRegistry&); void initializeDebugifyMachineModulePass(PassRegistry &); void initializeDelinearizationPass(PassRegistry&); @@ -138,7 +140,7 @@ void initializeDomPrinterPass(PassRegistry&); void initializeDomViewerPass(PassRegistry&); void initializeDominanceFrontierWrapperPassPass(PassRegistry&); void initializeDominatorTreeWrapperPassPass(PassRegistry&); -void initializeDwarfEHPreparePass(PassRegistry&); +void initializeDwarfEHPrepareLegacyPassPass(PassRegistry &); void initializeEarlyCSELegacyPassPass(PassRegistry&); void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry&); void initializeEarlyIfConverterPass(PassRegistry&); @@ -177,11 +179,13 @@ void initializeGlobalSplitPass(PassRegistry&); void initializeGlobalsAAWrapperPassPass(PassRegistry&); void initializeGuardWideningLegacyPassPass(PassRegistry&); void initializeHardwareLoopsPass(PassRegistry&); +void initializeMemProfilerLegacyPassPass(PassRegistry &); void initializeHotColdSplittingLegacyPassPass(PassRegistry&); void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &); -void initializeIPCPPass(PassRegistry&); void initializeIPSCCPLegacyPassPass(PassRegistry&); void initializeIRCELegacyPassPass(PassRegistry&); +void initializeIROutlinerLegacyPassPass(PassRegistry&); +void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry&); void initializeIRTranslatorPass(PassRegistry&); void initializeIVUsersWrapperPassPass(PassRegistry&); void initializeIfConverterPass(PassRegistry&); @@ -193,7 +197,7 @@ void initializeInferAddressSpacesPass(PassRegistry&); void initializeInferFunctionAttrsLegacyPassPass(PassRegistry&); void initializeInjectTLIMappingsLegacyPass(PassRegistry &); void initializeInlineCostAnalysisPass(PassRegistry&); -void initializeInstCountPass(PassRegistry&); +void initializeInstCountLegacyPassPass(PassRegistry &); void initializeInstNamerPass(PassRegistry&); void initializeInstSimplifyLegacyPassPass(PassRegistry &); void initializeInstrProfilingLegacyPassPass(PassRegistry&); @@ -219,7 +223,7 @@ void initializeLegalizerPass(PassRegistry&); void initializeGISelCSEAnalysisWrapperPassPass(PassRegistry &); void initializeGISelKnownBitsAnalysisPass(PassRegistry &); void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&); -void initializeLintPass(PassRegistry&); +void initializeLintLegacyPassPass(PassRegistry &); void initializeLiveDebugValuesPass(PassRegistry&); void initializeLiveDebugVariablesPass(PassRegistry&); void initializeLiveIntervalsPass(PassRegistry&); @@ -235,17 +239,18 @@ void initializeLoopAccessLegacyAnalysisPass(PassRegistry&); void initializeLoopDataPrefetchLegacyPassPass(PassRegistry&); void initializeLoopDeletionLegacyPassPass(PassRegistry&); void initializeLoopDistributeLegacyPass(PassRegistry&); -void initializeLoopExtractorPass(PassRegistry&); +void initializeLoopExtractorLegacyPassPass(PassRegistry &); void initializeLoopGuardWideningLegacyPassPass(PassRegistry&); void initializeLoopFuseLegacyPass(PassRegistry&); void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&); void initializeLoopInfoWrapperPassPass(PassRegistry&); void initializeLoopInstSimplifyLegacyPassPass(PassRegistry&); -void initializeLoopInterchangePass(PassRegistry&); +void initializeLoopInterchangeLegacyPassPass(PassRegistry &); +void initializeLoopFlattenLegacyPassPass(PassRegistry&); void initializeLoopLoadEliminationPass(PassRegistry&); void initializeLoopPassPass(PassRegistry&); void initializeLoopPredicationLegacyPassPass(PassRegistry&); -void initializeLoopRerollPass(PassRegistry&); +void initializeLoopRerollLegacyPassPass(PassRegistry &); void initializeLoopRotateLegacyPassPass(PassRegistry&); void initializeLoopSimplifyCFGLegacyPassPass(PassRegistry&); void initializeLoopSimplifyPass(PassRegistry&); @@ -254,8 +259,8 @@ void initializeLoopUnrollAndJamPass(PassRegistry&); void initializeLoopUnrollPass(PassRegistry&); void initializeLoopUnswitchPass(PassRegistry&); void initializeLoopVectorizePass(PassRegistry&); -void initializeLoopVersioningLICMPass(PassRegistry&); -void initializeLoopVersioningPassPass(PassRegistry&); +void initializeLoopVersioningLICMLegacyPassPass(PassRegistry &); +void initializeLoopVersioningLegacyPassPass(PassRegistry &); void initializeLowerAtomicLegacyPassPass(PassRegistry&); void initializeLowerConstantIntrinsicsPass(PassRegistry&); void initializeLowerEmuTLSPass(PassRegistry&); @@ -264,9 +269,10 @@ void initializeLowerGuardIntrinsicLegacyPassPass(PassRegistry&); void initializeLowerWidenableConditionLegacyPassPass(PassRegistry&); void initializeLowerIntrinsicsPass(PassRegistry&); void initializeLowerInvokeLegacyPassPass(PassRegistry&); -void initializeLowerSwitchPass(PassRegistry&); +void initializeLowerSwitchLegacyPassPass(PassRegistry &); void initializeLowerTypeTestsPass(PassRegistry&); void initializeLowerMatrixIntrinsicsLegacyPassPass(PassRegistry &); +void initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(PassRegistry &); void initializeMIRCanonicalizerPass(PassRegistry &); void initializeMIRNamerPass(PassRegistry &); void initializeMIRPrintingPassPass(PassRegistry&); @@ -280,6 +286,7 @@ void initializeMachineCopyPropagationPass(PassRegistry&); void initializeMachineDominanceFrontierPass(PassRegistry&); void initializeMachineDominatorTreePass(PassRegistry&); void initializeMachineFunctionPrinterPassPass(PassRegistry&); +void initializeMachineFunctionSplitterPass(PassRegistry &); void initializeMachineLICMPass(PassRegistry&); void initializeMachineLoopInfoPass(PassRegistry&); void initializeMachineModuleInfoWrapperPassPass(PassRegistry &); @@ -303,7 +310,8 @@ void initializeMergeFunctionsLegacyPassPass(PassRegistry&); void initializeMergeICmpsLegacyPassPass(PassRegistry &); void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&); void initializeMetaRenamerPass(PassRegistry&); -void initializeModuleDebugInfoPrinterPass(PassRegistry&); +void initializeModuleDebugInfoLegacyPrinterPass(PassRegistry &); +void initializeModuleMemProfilerLegacyPassPass(PassRegistry &); void initializeModuleSummaryIndexWrapperPassPass(PassRegistry&); void initializeModuloScheduleTestPass(PassRegistry&); void initializeMustExecutePrinterPass(PassRegistry&); @@ -314,9 +322,9 @@ void initializeNaryReassociateLegacyPassPass(PassRegistry&); void initializeNewGVNLegacyPassPass(PassRegistry&); void initializeObjCARCAAWrapperPassPass(PassRegistry&); void initializeObjCARCAPElimPass(PassRegistry&); -void initializeObjCARCContractPass(PassRegistry&); +void initializeObjCARCContractLegacyPassPass(PassRegistry &); void initializeObjCARCExpandPass(PassRegistry&); -void initializeObjCARCOptPass(PassRegistry&); +void initializeObjCARCOptLegacyPassPass(PassRegistry &); void initializeOptimizationRemarkEmitterWrapperPassPass(PassRegistry&); void initializeOptimizePHIsPass(PassRegistry&); void initializePAEvalPass(PassRegistry&); @@ -355,13 +363,14 @@ void initializeProfileSummaryInfoWrapperPassPass(PassRegistry&); void initializePromoteLegacyPassPass(PassRegistry&); void initializePruneEHPass(PassRegistry&); void initializeRABasicPass(PassRegistry&); +void initializePseudoProbeInserterPass(PassRegistry &); void initializeRAGreedyPass(PassRegistry&); void initializeReachingDefAnalysisPass(PassRegistry&); void initializeReassociateLegacyPassPass(PassRegistry&); void initializeRedundantDbgInstEliminationPass(PassRegistry&); void initializeRegAllocFastPass(PassRegistry&); void initializeRegBankSelectPass(PassRegistry&); -void initializeRegToMemPass(PassRegistry&); +void initializeRegToMemLegacyPass(PassRegistry&); void initializeRegUsageInfoCollectorPass(PassRegistry&); void initializeRegUsageInfoPropagationPass(PassRegistry&); void initializeRegionInfoPassPass(PassRegistry&); @@ -384,11 +393,11 @@ void initializeSafepointIRVerifierPass(PassRegistry&); void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&); void initializeModuleSanitizerCoverageLegacyPassPass(PassRegistry &); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); -void initializeScalarizeMaskedMemIntrinPass(PassRegistry&); +void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &); void initializeScalarizerLegacyPassPass(PassRegistry&); void initializeScavengerTestPass(PassRegistry&); void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&); -void initializeSeparateConstOffsetFromGEPPass(PassRegistry&); +void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &); void initializeShadowStackGCLoweringPass(PassRegistry&); void initializeShrinkWrapPass(PassRegistry&); void initializeSimpleInlinerPass(PassRegistry&); @@ -405,16 +414,16 @@ void initializeStackProtectorPass(PassRegistry&); void initializeStackSafetyGlobalInfoWrapperPassPass(PassRegistry &); void initializeStackSafetyInfoWrapperPassPass(PassRegistry &); void initializeStackSlotColoringPass(PassRegistry&); -void initializeStraightLineStrengthReducePass(PassRegistry&); +void initializeStraightLineStrengthReduceLegacyPassPass(PassRegistry &); void initializeStripDeadDebugInfoPass(PassRegistry&); void initializeStripDeadPrototypesLegacyPassPass(PassRegistry&); void initializeStripDebugDeclarePass(PassRegistry&); void initializeStripDebugMachineModulePass(PassRegistry &); -void initializeStripGCRelocatesPass(PassRegistry&); +void initializeStripGCRelocatesLegacyPass(PassRegistry &); void initializeStripNonDebugSymbolsPass(PassRegistry&); -void initializeStripNonLineTableDebugInfoPass(PassRegistry&); +void initializeStripNonLineTableDebugLegacyPassPass(PassRegistry &); void initializeStripSymbolsPass(PassRegistry&); -void initializeStructurizeCFGPass(PassRegistry&); +void initializeStructurizeCFGLegacyPassPass(PassRegistry &); void initializeTailCallElimPass(PassRegistry&); void initializeTailDuplicatePass(PassRegistry&); void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&); @@ -424,8 +433,8 @@ void initializeThreadSanitizerLegacyPassPass(PassRegistry&); void initializeTwoAddressInstructionPassPass(PassRegistry&); void initializeTypeBasedAAWrapperPassPass(PassRegistry&); void initializeTypePromotionPass(PassRegistry&); -void initializeUnifyFunctionExitNodesPass(PassRegistry&); -void initializeUnifyLoopExitsPass(PassRegistry &); +void initializeUnifyFunctionExitNodesLegacyPassPass(PassRegistry &); +void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &); void initializeUnpackMachineBundlesPass(PassRegistry&); void initializeUnreachableBlockElimLegacyPassPass(PassRegistry&); void initializeUnreachableMachineBlockElimPass(PassRegistry&); diff --git a/llvm/include/llvm/InterfaceStub/ELFObjHandler.h b/llvm/include/llvm/InterfaceStub/ELFObjHandler.h new file mode 100644 index 000000000000..4ec158c1405f --- /dev/null +++ b/llvm/include/llvm/InterfaceStub/ELFObjHandler.h @@ -0,0 +1,47 @@ +//===- ELFObjHandler.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------------===/ +/// +/// This supports reading and writing of elf dynamic shared objects. +/// +//===-----------------------------------------------------------------------===/ + +#ifndef LLVM_TOOLS_ELFABI_ELFOBJHANDLER_H +#define LLVM_TOOLS_ELFABI_ELFOBJHANDLER_H + +#include "llvm/InterfaceStub/ELFStub.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ELFTypes.h" +#include "llvm/Support/FileSystem.h" + +namespace llvm { + +class MemoryBuffer; + +namespace elfabi { + +enum class ELFTarget { ELF32LE, ELF32BE, ELF64LE, ELF64BE }; + +/// Attempt to read a binary ELF file from a MemoryBuffer. +Expected<std::unique_ptr<ELFStub>> readELFFile(MemoryBufferRef Buf); + +/// Attempt to write a binary ELF stub. +/// This function determines appropriate ELFType using the passed ELFTarget and +/// then writes a binary ELF stub to a specified file path. +/// +/// @param FilePath File path for writing the ELF binary. +/// @param Stub Source ELFStub to generate a binary ELF stub from. +/// @param OutputFormat Target ELFType to write binary as. +/// @param WriteIfChanged Whether or not to preserve timestamp if +/// the output stays the same. +Error writeBinaryStub(StringRef FilePath, const ELFStub &Stub, + ELFTarget OutputFormat, bool WriteIfChanged = false); + +} // end namespace elfabi +} // end namespace llvm + +#endif // LLVM_TOOLS_ELFABI_ELFOBJHANDLER_H diff --git a/llvm/include/llvm/TextAPI/ELF/ELFStub.h b/llvm/include/llvm/InterfaceStub/ELFStub.h index 76b2af121662..7832c1c7413b 100644 --- a/llvm/include/llvm/TextAPI/ELF/ELFStub.h +++ b/llvm/include/llvm/InterfaceStub/ELFStub.h @@ -16,8 +16,8 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/VersionTuple.h" -#include <vector> #include <set> +#include <vector> namespace llvm { namespace elfabi { @@ -42,15 +42,13 @@ struct ELFSymbol { bool Undefined; bool Weak; Optional<std::string> Warning; - bool operator<(const ELFSymbol &RHS) const { - return Name < RHS.Name; - } + bool operator<(const ELFSymbol &RHS) const { return Name < RHS.Name; } }; // A cumulative representation of ELF stubs. // Both textual and binary stubs will read into and write from this object. class ELFStub { -// TODO: Add support for symbol versioning. + // TODO: Add support for symbol versioning. public: VersionTuple TbeVersion; Optional<std::string> SoName; diff --git a/llvm/include/llvm/TextAPI/ELF/TBEHandler.h b/llvm/include/llvm/InterfaceStub/TBEHandler.h index 76484410987f..5c523eba037e 100644 --- a/llvm/include/llvm/TextAPI/ELF/TBEHandler.h +++ b/llvm/include/llvm/InterfaceStub/TBEHandler.h @@ -15,8 +15,8 @@ #ifndef LLVM_TEXTAPI_ELF_TBEHANDLER_H #define LLVM_TEXTAPI_ELF_TBEHANDLER_H -#include "llvm/Support/VersionTuple.h" #include "llvm/Support/Error.h" +#include "llvm/Support/VersionTuple.h" #include <memory> namespace llvm { diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h index 0a3e52316460..88c1452e5aa9 100644 --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -1,4 +1,4 @@ -//===-Config.h - LLVM Link Time Optimizer Configuration -------------------===// +//===-Config.h - LLVM Link Time Optimizer Configuration ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -15,9 +15,11 @@ #define LLVM_LTO_CONFIG_H #include "llvm/ADT/DenseSet.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetOptions.h" @@ -42,6 +44,8 @@ struct Config { TargetOptions Options; std::vector<std::string> MAttrs; std::vector<std::string> PassPlugins; + /// For adding passes that run right before codegen. + std::function<void(legacy::PassManager &)> PreCodeGenPassesHook; Optional<Reloc::Model> RelocModel = Reloc::PIC_; Optional<CodeModel::Model> CodeModel = None; CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default; @@ -50,7 +54,7 @@ struct Config { bool DisableVerify = false; /// Use the new pass manager - bool UseNewPM = false; + bool UseNewPM = LLVM_ENABLE_NEW_PASS_MANAGER; /// Flag to indicate that the optimizer should not assume builtins are present /// on the target. @@ -113,16 +117,31 @@ struct Config { std::string SplitDwarfOutput; /// Optimization remarks file path. - std::string RemarksFilename = ""; + std::string RemarksFilename; /// Optimization remarks pass filter. - std::string RemarksPasses = ""; + std::string RemarksPasses; /// Whether to emit optimization remarks with hotness informations. bool RemarksWithHotness = false; + /// The minimum hotness value a diagnostic needs in order to be included in + /// optimization diagnostics. + /// + /// The threshold is an Optional value, which maps to one of the 3 states: + /// 1. 0 => threshold disabled. All emarks will be printed. + /// 2. positive int => manual threshold by user. Remarks with hotness exceed + /// threshold will be printed. + /// 3. None => 'auto' threshold by user. The actual value is not + /// available at command line, but will be synced with + /// hotness threhold from profile summary during + /// compilation. + /// + /// If threshold option is not specified, it is disabled by default. + llvm::Optional<uint64_t> RemarksHotnessThreshold = 0; + /// The format used for serializing remarks (default: YAML). - std::string RemarksFormat = ""; + std::string RemarksFormat; /// Whether to emit the pass manager debuggging informations. bool DebugPassManager = false; diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index 93456c0ae7ae..4f169137ee85 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -82,15 +82,19 @@ std::string getThinLTOOutputFile(const std::string &Path, const std::string &NewPrefix); /// Setup optimization remarks. -Expected<std::unique_ptr<ToolOutputFile>> -setupLLVMOptimizationRemarks(LLVMContext &Context, StringRef RemarksFilename, - StringRef RemarksPasses, StringRef RemarksFormat, - bool RemarksWithHotness, int Count = -1); +Expected<std::unique_ptr<ToolOutputFile>> setupLLVMOptimizationRemarks( + LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses, + StringRef RemarksFormat, bool RemarksWithHotness, + Optional<uint64_t> RemarksHotnessThreshold = 0, int Count = -1); /// Setups the output file for saving statistics. Expected<std::unique_ptr<ToolOutputFile>> setupStatsFile(StringRef StatsFilename); +/// Produces a container ordering for optimal multi-threaded processing. Returns +/// ordered indices to elements in the input array. +std::vector<int> generateModulesOrdering(ArrayRef<BitcodeModule *> R); + class LTO; struct SymbolResolution; class ThinBackendProc; diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h index 0226e4a3fbf5..824c7d143854 100644 --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -33,6 +33,12 @@ class Target; namespace lto { +/// Runs middle-end LTO optimizations on \p Mod. +bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, + bool IsThinLTO, ModuleSummaryIndex *ExportSummary, + const ModuleSummaryIndex *ImportSummary, + const std::vector<uint8_t> &CmdArgs); + /// Runs a regular LTO backend. The regular LTO backend can also act as the /// regular LTO phase of ThinLTO, which may need to access the combined index. Error backend(const Config &C, AddStreamFn AddStream, @@ -44,10 +50,27 @@ Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, Module &M, const ModuleSummaryIndex &CombinedIndex, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, - MapVector<StringRef, BitcodeModule> &ModuleMap); + MapVector<StringRef, BitcodeModule> &ModuleMap, + const std::vector<uint8_t> &CmdArgs = std::vector<uint8_t>()); Error finalizeOptimizationRemarks( std::unique_ptr<ToolOutputFile> DiagOutputFile); + +/// Returns the BitcodeModule that is ThinLTO. +BitcodeModule *findThinLTOModule(MutableArrayRef<BitcodeModule> BMs); + +/// Variant of the above. +Expected<BitcodeModule> findThinLTOModule(MemoryBufferRef MBRef); + +/// Distributed ThinLTO: load the referenced modules, keeping their buffers +/// alive in the provided OwnedImportLifetimeManager. Returns false if the +/// operation failed. +bool loadReferencedModules( + const Module &M, const ModuleSummaryIndex &CombinedIndex, + FunctionImporter::ImportMapTy &ImportList, + MapVector<llvm::StringRef, llvm::BitcodeModule> &ModuleMap, + std::vector<std::unique_ptr<llvm::MemoryBuffer>> + &OwnedImportsLifetimeManager); } } diff --git a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h index d7ccc0d5a6c5..fc7b8fc25bd9 100644 --- a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h +++ b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h @@ -93,7 +93,7 @@ struct LTOCodeGenerator { void setFileType(CodeGenFileType FT) { FileType = FT; } void setCpu(StringRef MCpu) { this->MCpu = std::string(MCpu); } - void setAttr(StringRef MAttr) { this->MAttr = std::string(MAttr); } + void setAttrs(std::vector<std::string> MAttrs) { this->MAttrs = MAttrs; } void setOptLevel(unsigned OptLevel); void setShouldInternalize(bool Value) { ShouldInternalize = Value; } @@ -145,9 +145,7 @@ struct LTOCodeGenerator { /// \note It is up to the linker to remove the intermediate output file. Do /// not try to remove the object file in LTOCodeGenerator's destructor as we /// don't who (LTOCodeGenerator or the output file) will last longer. - bool compile_to_file(const char **Name, bool DisableVerify, - bool DisableInline, bool DisableGVNLoadPRE, - bool DisableVectorization); + bool compile_to_file(const char **Name); /// As with compile_to_file(), this function compiles the merged module into /// single output file. Instead of returning the output file path to the @@ -155,15 +153,12 @@ struct LTOCodeGenerator { /// to the caller. This function should delete the intermediate file once /// its content is brought to memory. Return NULL if the compilation was not /// successful. - std::unique_ptr<MemoryBuffer> compile(bool DisableVerify, bool DisableInline, - bool DisableGVNLoadPRE, - bool DisableVectorization); + std::unique_ptr<MemoryBuffer> compile(); /// Optimizes the merged module. Returns true on success. /// /// Calls \a verifyMergedModuleOnce(). - bool optimize(bool DisableVerify, bool DisableInline, bool DisableGVNLoadPRE, - bool DisableVectorization); + bool optimize(); /// Compiles the merged optimized module into a single output file. It brings /// the output to a buffer, and returns the buffer to the caller. Return NULL @@ -183,6 +178,8 @@ struct LTOCodeGenerator { /// assume builtins are present on the target. void setFreestanding(bool Enabled) { Freestanding = Enabled; } + void setDisableVerify(bool Value) { DisableVerify = Value; } + void setDiagnosticHandler(lto_diagnostic_handler_t, void *); LLVMContext &getContext() { return Context; } @@ -228,7 +225,7 @@ private: std::vector<std::string> CodegenOptions; std::string FeatureStr; std::string MCpu; - std::string MAttr; + std::vector<std::string> MAttrs; std::string NativeObjectPath; TargetOptions Options; CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default; @@ -244,6 +241,7 @@ private: std::unique_ptr<ToolOutputFile> DiagnosticOutputFile; bool Freestanding = false; std::unique_ptr<ToolOutputFile> StatsFile = nullptr; + bool DisableVerify = false; }; } #endif diff --git a/llvm/include/llvm/LTO/legacy/LTOModule.h b/llvm/include/llvm/LTO/legacy/LTOModule.h index 998a4557dd22..310447d615f9 100644 --- a/llvm/include/llvm/LTO/legacy/LTOModule.h +++ b/llvm/include/llvm/LTO/legacy/LTOModule.h @@ -48,8 +48,6 @@ private: std::string LinkerOpts; - std::string DependentLibraries; - std::unique_ptr<Module> Mod; MemoryBufferRef MBRef; ModuleSymbolTable SymTab; diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 90e2e24294d4..891d534b4fa6 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -89,12 +89,10 @@ namespace { (void) llvm::createLibCallsShrinkWrapPass(); (void) llvm::createCalledValuePropagationPass(); (void) llvm::createConstantMergePass(); - (void) llvm::createConstantPropagationPass(); (void) llvm::createControlHeightReductionLegacyPass(); (void) llvm::createCostModelAnalysisPass(); (void) llvm::createDeadArgEliminationPass(); (void) llvm::createDeadCodeEliminationPass(); - (void) llvm::createDeadInstEliminationPass(); (void) llvm::createDeadStoreEliminationPass(); (void) llvm::createDependenceAnalysisWrapperPass(); (void) llvm::createDomOnlyPrinterPass(); @@ -116,7 +114,6 @@ namespace { (void) llvm::createGlobalsAAWrapperPass(); (void) llvm::createGuardWideningPass(); (void) llvm::createLoopGuardWideningPass(); - (void) llvm::createIPConstantPropagationPass(); (void) llvm::createIPSCCPPass(); (void) llvm::createInductiveRangeCheckEliminationPass(); (void) llvm::createIndVarSimplifyPass(); @@ -130,6 +127,7 @@ namespace { (void) llvm::createLazyValueInfoPass(); (void) llvm::createLoopExtractorPass(); (void) llvm::createLoopInterchangePass(); + (void) llvm::createLoopFlattenPass(); (void) llvm::createLoopPredicationPass(); (void) llvm::createLoopSimplifyPass(); (void) llvm::createLoopSimplifyCFGPass(); @@ -205,7 +203,7 @@ namespace { (void) llvm::createPrintFunctionPass(os); (void) llvm::createModuleDebugInfoPrinterPass(); (void) llvm::createPartialInliningPass(); - (void) llvm::createLintPass(); + (void) llvm::createLintLegacyPassPass(); (void) llvm::createSinkingPass(); (void) llvm::createLowerAtomicPass(); (void) llvm::createCorrelatedValuePropagationPass(); @@ -226,7 +224,7 @@ namespace { (void) llvm::createMustBeExecutedContextPrinter(); (void) llvm::createFloat2IntPass(); (void) llvm::createEliminateAvailableExternallyPass(); - (void) llvm::createScalarizeMaskedMemIntrinPass(); + (void)llvm::createScalarizeMaskedMemIntrinLegacyPass(); (void) llvm::createWarnMissedTransformationsPass(); (void) llvm::createHardwareLoopsPass(); (void) llvm::createInjectTLIMappingsLegacyPass(); @@ -241,7 +239,7 @@ namespace { llvm::TargetLibraryInfo TLI(TLII); llvm::AliasAnalysis AA(TLI); llvm::AliasSetTracker X(AA); - X.add(nullptr, llvm::LocationSize::unknown(), + X.add(nullptr, llvm::LocationSize::beforeOrAfterPointer(), llvm::AAMDNodes()); // for -print-alias-sets (void) llvm::AreStatisticsEnabled(); (void) llvm::sys::RunningOnValgrind(); diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h index cc9f42023bc2..94ed3d27e785 100644 --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -144,7 +144,9 @@ public: /// \param STI - The MCSubtargetInfo in effect when the instruction was /// encoded. virtual bool mayNeedRelaxation(const MCInst &Inst, - const MCSubtargetInfo &STI) const = 0; + const MCSubtargetInfo &STI) const { + return false; + } /// Target specific predicate for whether a given fixup requires the /// associated instruction to be relaxed. @@ -175,6 +177,10 @@ public: /// virtual unsigned getMinimumNopSize() const { return 1; } + /// Returns the maximum size of a nop in bytes on this target. + /// + virtual unsigned getMaximumNopSize() const { return 0; } + /// Write an (optimal) nop sequence of Count bytes to the given output. If the /// target cannot generate such a sequence, it should return an error. /// diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index 46c5a111c891..9b2ac558756e 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -54,6 +54,15 @@ enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment }; /// This class is intended to be used as a base class for asm /// properties and features specific to the target. class MCAsmInfo { +public: + /// Assembly character literal syntax types. + enum AsmCharLiteralSyntax { + ACLS_Unknown, /// Unknown; character literals not used by LLVM for this + /// target. + ACLS_SingleQuotePrefix, /// The desired character is prefixed by a single + /// quote, e.g., `'A`. + }; + protected: //===------------------------------------------------------------------===// // Properties to be set by the target writer, used to configure asm printer. @@ -177,6 +186,9 @@ protected: /// alignment is supported. bool UseDotAlignForAlignment = false; + /// True if the target supports LEB128 directives. + bool HasLEB128Directives = true; + //===--- Data Emission Directives -------------------------------------===// /// This should be set to the directive used to get some number of zero (and @@ -200,6 +212,16 @@ protected: /// doesn't support this, it can be set to null. Defaults to "\t.asciz\t" const char *AscizDirective; + /// This directive accepts a comma-separated list of bytes for emission as a + /// string of bytes. For targets that do not support this, it shall be set to + /// null. Defaults to null. + const char *ByteListDirective = nullptr; + + /// Form used for character literals in the assembly syntax. Useful for + /// producing strings as byte lists. If a target does not use or support + /// this, it shall be set to ACLS_Unknown. Defaults to ACLS_Unknown. + AsmCharLiteralSyntax CharacterLiteralSyntax = ACLS_Unknown; + /// These directives are used to output some unit of integer data to the /// current section. If a data directive is set to null, smaller data /// directives will be used to emit the large sizes. Defaults to "\t.byte\t", @@ -209,6 +231,9 @@ protected: const char *Data32bitsDirective; const char *Data64bitsDirective; + /// True if data directives support signed values + bool SupportsSignedData = true; + /// If non-null, a directive that is used to emit a word which should be /// relocated as a 64-bit GP-relative offset, e.g. .gpdword on Mips. Defaults /// to nullptr. @@ -381,6 +406,12 @@ protected: //===--- Integrated Assembler Information ----------------------------===// + // Generated object files can use all ELF features supported by GNU ld of + // this binutils version and later. INT_MAX means all features can be used, + // regardless of GNU ld support. The default value is referenced by + // clang/Driver/Options.td. + std::pair<int, int> BinutilsVersion = {2, 26}; + /// Should we use the integrated assembler? /// The integrated assembler should be enabled by default (by the /// constructors) when failing to parse a valid piece of assembly (inline @@ -436,6 +467,7 @@ public: const char *getData16bitsDirective() const { return Data16bitsDirective; } const char *getData32bitsDirective() const { return Data32bitsDirective; } const char *getData64bitsDirective() const { return Data64bitsDirective; } + bool supportsSignedData() const { return SupportsSignedData; } const char *getGPRel64Directive() const { return GPRel64Directive; } const char *getGPRel32Directive() const { return GPRel32Directive; } const char *getDTPRel64Directive() const { return DTPRel64Directive; } @@ -552,12 +584,18 @@ public: return UseDotAlignForAlignment; } + bool hasLEB128Directives() const { return HasLEB128Directives; } + const char *getZeroDirective() const { return ZeroDirective; } bool doesZeroDirectiveSupportNonZeroValue() const { return ZeroDirectiveSupportsNonZeroValue; } const char *getAsciiDirective() const { return AsciiDirective; } const char *getAscizDirective() const { return AscizDirective; } + const char *getByteListDirective() const { return ByteListDirective; } + AsmCharLiteralSyntax characterLiteralSyntax() const { + return CharacterLiteralSyntax; + } bool getAlignmentIsInBytes() const { return AlignmentIsInBytes; } unsigned getTextAlignFillValue() const { return TextAlignFillValue; } const char *getGlobalDirective() const { return GlobalDirective; } @@ -604,10 +642,6 @@ public: bool doesSupportDebugInformation() const { return SupportsDebugInformation; } - bool doesSupportExceptionHandling() const { - return ExceptionsType != ExceptionHandling::None; - } - ExceptionHandling getExceptionHandlingType() const { return ExceptionsType; } WinEH::EncodingType getWinEHEncodingType() const { return WinEHEncodingType; } @@ -645,9 +679,17 @@ public: return InitialFrameState; } + void setBinutilsVersion(std::pair<int, int> Value) { + BinutilsVersion = Value; + } + /// Return true if assembly (inline or otherwise) should be parsed. bool useIntegratedAssembler() const { return UseIntegratedAssembler; } + bool binutilsIsAtLeast(int Major, int Minor) const { + return BinutilsVersion >= std::make_pair(Major, Minor); + } + /// Set whether assembly (inline or otherwise) should be parsed. virtual void setUseIntegratedAssembler(bool Value) { UseIntegratedAssembler = Value; diff --git a/llvm/include/llvm/MC/MCAsmMacro.h b/llvm/include/llvm/MC/MCAsmMacro.h index 7eecce0faf64..e3d6a858132d 100644 --- a/llvm/include/llvm/MC/MCAsmMacro.h +++ b/llvm/include/llvm/MC/MCAsmMacro.h @@ -143,10 +143,16 @@ struct MCAsmMacro { StringRef Name; StringRef Body; MCAsmMacroParameters Parameters; + std::vector<std::string> Locals; + bool IsFunction = false; public: MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P) : Name(N), Body(B), Parameters(std::move(P)) {} + MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P, + std::vector<std::string> L, bool F) + : Name(N), Body(B), Parameters(std::move(P)), Locals(std::move(L)), + IsFunction(F) {} #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void dump() const { dump(dbgs()); } diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h index b57439f02ca5..1b76559d33b3 100644 --- a/llvm/include/llvm/MC/MCAssembler.h +++ b/llvm/include/llvm/MC/MCAssembler.h @@ -202,6 +202,7 @@ private: bool relaxCVInlineLineTable(MCAsmLayout &Layout, MCCVInlineLineTableFragment &DF); bool relaxCVDefRange(MCAsmLayout &Layout, MCCVDefRangeFragment &DF); + bool relaxPseudoProbeAddr(MCAsmLayout &Layout, MCPseudoProbeAddrFragment &DF); /// finishLayout - Finalize a layout, including fragment lowering. void finishLayout(MCAsmLayout &Layout); @@ -210,7 +211,12 @@ private: handleFixup(const MCAsmLayout &Layout, MCFragment &F, const MCFixup &Fixup); public: - std::vector<std::pair<StringRef, const MCSymbol *>> Symvers; + struct Symver { + StringRef Name; + const MCSymbol *Sym; + SMLoc Loc; + }; + std::vector<Symver> Symvers; /// Construct a new assembler instance. // diff --git a/llvm/include/llvm/MC/MCCodeView.h b/llvm/include/llvm/MC/MCCodeView.h index 2126354cded6..5770f370341d 100644 --- a/llvm/include/llvm/MC/MCCodeView.h +++ b/llvm/include/llvm/MC/MCCodeView.h @@ -166,8 +166,6 @@ public: unsigned FileNo, unsigned Line, unsigned Column, bool PrologueEnd, bool IsStmt); - bool isValidCVFileNumber(unsigned FileNumber); - /// Add a line entry. void addLineEntry(const MCCVLoc &LineEntry); diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index 45be9bb3d225..49ab0ce8d6fd 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -22,6 +22,7 @@ #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/MC/MCAsmMacro.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCPseudoProbe.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/SectionKind.h" @@ -97,6 +98,7 @@ namespace llvm { SpecificBumpPtrAllocator<MCSectionMachO> MachOAllocator; SpecificBumpPtrAllocator<MCSectionWasm> WasmAllocator; SpecificBumpPtrAllocator<MCSectionXCOFF> XCOFFAllocator; + SpecificBumpPtrAllocator<MCInst> MCInstAllocator; /// Bindings of names to symbols. SymbolTable Symbols; @@ -198,6 +200,9 @@ namespace llvm { /// The Compile Unit ID that we are currently processing. unsigned DwarfCompileUnitID = 0; + /// A collection of MCPseudoProbe in the current module + MCPseudoProbeTable PseudoProbeTable; + // Sections are differentiated by the quadruple (section_name, group_name, // unique_id, link_to_symbol_name). Sections sharing the same quadruple are // combined into one section. @@ -380,6 +385,11 @@ namespace llvm { /// @} + /// \name McInst Management + + /// Create and return a new MC instruction. + MCInst *createMCInst(); + /// \name Symbol Management /// @{ @@ -387,12 +397,16 @@ namespace llvm { /// unspecified name. MCSymbol *createLinkerPrivateTempSymbol(); - /// Create and return a new assembler temporary symbol with a unique but - /// unspecified name. - MCSymbol *createTempSymbol(bool CanBeUnnamed = true); + /// Create a temporary symbol with a unique name. The name will be omitted + /// in the symbol table if UseNamesOnTempLabels is false (default except + /// MCAsmStreamer). The overload without Name uses an unspecified name. + MCSymbol *createTempSymbol(); + MCSymbol *createTempSymbol(const Twine &Name, bool AlwaysAddSuffix = true); - MCSymbol *createTempSymbol(const Twine &Name, bool AlwaysAddSuffix, - bool CanBeUnnamed = true); + /// Create a temporary symbol with a unique name whose name cannot be + /// omitted in the symbol table. This is rarely used. + MCSymbol *createNamedTempSymbol(); + MCSymbol *createNamedTempSymbol(const Twine &Name); /// Create the definition of a directional local symbol for numbered label /// (used for "1:" definitions). @@ -558,9 +572,8 @@ namespace llvm { MCSectionXCOFF *getXCOFFSection(StringRef Section, XCOFF::StorageMappingClass MappingClass, - XCOFF::SymbolType CSectType, - XCOFF::StorageClass StorageClass, - SectionKind K, + XCOFF::SymbolType CSectType, SectionKind K, + bool MultiSymbolsAllowed = false, const char *BeginSymName = nullptr); // Create and save a copy of STI and return a reference to the copy. @@ -744,6 +757,8 @@ namespace llvm { } void undefineMacro(StringRef Name) { MacroMap.erase(Name); } + + MCPseudoProbeTable &getMCPseudoProbeTable() { return PseudoProbeTable; } }; } // end namespace llvm diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h index e3cea0ae64cf..5bf6496806d8 100644 --- a/llvm/include/llvm/MC/MCDwarf.h +++ b/llvm/include/llvm/MC/MCDwarf.h @@ -25,6 +25,7 @@ #include <cassert> #include <cstdint> #include <string> +#include <tuple> #include <utility> #include <vector> @@ -387,11 +388,11 @@ public: int64_t LineDelta, uint64_t AddrDelta, raw_ostream &OS); /// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas using - /// fixed length operands. - static bool FixedEncode(MCContext &Context, - MCDwarfLineTableParams Params, - int64_t LineDelta, uint64_t AddrDelta, - raw_ostream &OS, uint32_t *Offset, uint32_t *Size); + /// fixed length operands. Returns (Offset, Size, SetDelta). + static std::tuple<uint32_t, uint32_t, bool> fixedEncode(MCContext &Context, + int64_t LineDelta, + uint64_t AddrDelta, + raw_ostream &OS); /// Utility function to emit the encoding to a streamer. static void Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params, @@ -467,10 +468,12 @@ private: unsigned Register2; }; std::vector<char> Values; + std::string Comment; - MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, StringRef V) + MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, StringRef V, + StringRef Comment = "") : Operation(Op), Label(L), Register(R), Offset(O), - Values(V.begin(), V.end()) { + Values(V.begin(), V.end()), Comment(Comment) { assert(Op != OpRegister); } @@ -570,8 +573,9 @@ public: /// .cfi_escape Allows the user to add arbitrary bytes to the unwind /// info. - static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals) { - return MCCFIInstruction(OpEscape, L, 0, 0, Vals); + static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, + StringRef Comment = "") { + return MCCFIInstruction(OpEscape, L, 0, 0, Vals, Comment); } /// A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE @@ -606,6 +610,10 @@ public: assert(Operation == OpEscape); return StringRef(&Values[0], Values.size()); } + + StringRef getComment() const { + return Comment; + } }; struct MCDwarfFrameInfo { diff --git a/llvm/include/llvm/MC/MCELFObjectWriter.h b/llvm/include/llvm/MC/MCELFObjectWriter.h index 8f78b99d3794..5d99c494b11e 100644 --- a/llvm/include/llvm/MC/MCELFObjectWriter.h +++ b/llvm/include/llvm/MC/MCELFObjectWriter.h @@ -23,7 +23,6 @@ namespace llvm { class MCAssembler; class MCContext; class MCFixup; -class MCObjectWriter; class MCSymbol; class MCSymbolELF; class MCValue; diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h index 803c0d443bee..3ffc845e75c0 100644 --- a/llvm/include/llvm/MC/MCExpr.h +++ b/llvm/include/llvm/MC/MCExpr.h @@ -224,6 +224,7 @@ public: VK_WEAKREF, // The link between the symbols in .weakref foo, bar VK_X86_ABS8, + VK_X86_PLTOFF, VK_ARM_NONE, VK_ARM_GOT_PREL, @@ -299,9 +300,14 @@ public: VK_PPC_GOT_TLSLD_HI, // symbol@got@tlsld@h VK_PPC_GOT_TLSLD_HA, // symbol@got@tlsld@ha VK_PPC_GOT_PCREL, // symbol@got@pcrel + VK_PPC_GOT_TLSGD_PCREL, // symbol@got@tlsgd@pcrel + VK_PPC_GOT_TLSLD_PCREL, // symbol@got@tlsld@pcrel + VK_PPC_GOT_TPREL_PCREL, // symbol@got@tprel@pcrel + VK_PPC_TLS_PCREL, // symbol@tls@pcrel VK_PPC_TLSLD, // symbol@tlsld VK_PPC_LOCAL, // symbol@local VK_PPC_NOTOC, // symbol@notoc + VK_PPC_PCREL_OPT, // .reloc expr, R_PPC64_PCREL_OPT, expr VK_COFF_IMGREL32, // symbol@imgrel (image-relative) @@ -316,8 +322,9 @@ public: VK_Hexagon_IE_GOT, VK_WASM_TYPEINDEX, // Reference to a symbol's type (signature) - VK_WASM_MBREL, // Memory address relative to memory base - VK_WASM_TBREL, // Table index relative to table bare + VK_WASM_TLSREL, // Memory address relative to __tls_base + VK_WASM_MBREL, // Memory address relative to __memory_base + VK_WASM_TBREL, // Table index relative to __table_base VK_AMDGPU_GOTPCREL32_LO, // symbol@gotpcrel32@lo VK_AMDGPU_GOTPCREL32_HI, // symbol@gotpcrel32@hi @@ -350,30 +357,20 @@ private: /// The symbol being referenced. const MCSymbol *Symbol; - // Subclass data stores VariantKind in bits 0..15, UseParensForSymbolVariant - // in bit 16 and HasSubsectionsViaSymbols in bit 17. + // Subclass data stores VariantKind in bits 0..15 and HasSubsectionsViaSymbols + // in bit 16. static const unsigned VariantKindBits = 16; static const unsigned VariantKindMask = (1 << VariantKindBits) - 1; - /// Specifies how the variant kind should be printed. - static const unsigned UseParensForSymbolVariantBit = 1 << VariantKindBits; - // FIXME: Remove this bit. - static const unsigned HasSubsectionsViaSymbolsBit = - 1 << (VariantKindBits + 1); + static const unsigned HasSubsectionsViaSymbolsBit = 1 << VariantKindBits; static unsigned encodeSubclassData(VariantKind Kind, - bool UseParensForSymbolVariant, - bool HasSubsectionsViaSymbols) { + bool HasSubsectionsViaSymbols) { return (unsigned)Kind | - (UseParensForSymbolVariant ? UseParensForSymbolVariantBit : 0) | (HasSubsectionsViaSymbols ? HasSubsectionsViaSymbolsBit : 0); } - bool useParensForSymbolVariant() const { - return (getSubclassData() & UseParensForSymbolVariantBit) != 0; - } - explicit MCSymbolRefExpr(const MCSymbol *Symbol, VariantKind Kind, const MCAsmInfo *MAI, SMLoc Loc = SMLoc()); @@ -400,8 +397,6 @@ public: return (VariantKind)(getSubclassData() & VariantKindMask); } - void printVariantKind(raw_ostream &OS) const; - bool hasSubsectionsViaSymbols() const { return (getSubclassData() & HasSubsectionsViaSymbolsBit) != 0; } @@ -499,6 +494,7 @@ public: Mul, ///< Multiplication. NE, ///< Inequality comparison. Or, ///< Bitwise or. + OrNot, ///< Bitwise or not. Shl, ///< Shift left. AShr, ///< Arithmetic shift right. LShr, ///< Logical shift right. diff --git a/llvm/include/llvm/MC/MCFixup.h b/llvm/include/llvm/MC/MCFixup.h index affc846cbdd4..b3a23911d636 100644 --- a/llvm/include/llvm/MC/MCFixup.h +++ b/llvm/include/llvm/MC/MCFixup.h @@ -9,7 +9,6 @@ #ifndef LLVM_MC_MCFIXUP_H #define LLVM_MC_MCFIXUP_H -#include "llvm/MC/MCExpr.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SMLoc.h" diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h index fb7166e82c09..000b0e33e117 100644 --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -37,6 +37,7 @@ public: FT_Data, FT_CompactEncodedInst, FT_Fill, + FT_Nops, FT_Relaxable, FT_Org, FT_Dwarf, @@ -46,6 +47,7 @@ public: FT_SymbolId, FT_CVInlineLines, FT_CVDefRange, + FT_PseudoProbe, FT_Dummy }; @@ -63,6 +65,10 @@ private: /// The layout order of this fragment. unsigned LayoutOrder; + /// The subsection this fragment belongs to. This is 0 if the fragment is not + // in any subsection. + unsigned SubsectionNumber = 0; + FragmentType Kind; /// Whether fragment is being laid out. @@ -101,6 +107,9 @@ public: bool hasInstructions() const { return HasInstructions; } void dump() const; + + void setSubsectionNumber(unsigned Value) { SubsectionNumber = Value; } + unsigned getSubsectionNumber() const { return SubsectionNumber; } }; class MCDummyFragment : public MCFragment { @@ -139,6 +148,7 @@ public: case MCFragment::FT_Data: case MCFragment::FT_Dwarf: case MCFragment::FT_DwarfFrame: + case MCFragment::FT_PseudoProbe: return true; } } @@ -350,6 +360,31 @@ public: } }; +class MCNopsFragment : public MCFragment { + /// The number of bytes to insert. + int64_t Size; + /// Maximum number of bytes allowed in each NOP instruction. + int64_t ControlledNopLength; + + /// Source location of the directive that this fragment was created for. + SMLoc Loc; + +public: + MCNopsFragment(int64_t NumBytes, int64_t ControlledNopLength, SMLoc L, + MCSection *Sec = nullptr) + : MCFragment(FT_Nops, false, Sec), Size(NumBytes), + ControlledNopLength(ControlledNopLength), Loc(L) {} + + int64_t getNumBytes() const { return Size; } + int64_t getControlledNopLength() const { return ControlledNopLength; } + + SMLoc getLoc() const { return Loc; } + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Nops; + } +}; + class MCOrgFragment : public MCFragment { /// Value to use for filling bytes. int8_t Value; @@ -558,6 +593,23 @@ public: return F->getKind() == MCFragment::FT_BoundaryAlign; } }; + +class MCPseudoProbeAddrFragment : public MCEncodedFragmentWithFixups<8, 1> { + /// The expression for the difference of the two symbols that + /// make up the address delta between two .pseudoprobe directives. + const MCExpr *AddrDelta; + +public: + MCPseudoProbeAddrFragment(const MCExpr *AddrDelta, MCSection *Sec = nullptr) + : MCEncodedFragmentWithFixups<8, 1>(FT_PseudoProbe, false, Sec), + AddrDelta(AddrDelta) {} + + const MCExpr &getAddrDelta() const { return *AddrDelta; } + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_PseudoProbe; + } +}; } // end namespace llvm #endif // LLVM_MC_MCFRAGMENT_H diff --git a/llvm/include/llvm/MC/MCInst.h b/llvm/include/llvm/MC/MCInst.h index 360dbda58fcb..2ce2ee063daa 100644 --- a/llvm/include/llvm/MC/MCInst.h +++ b/llvm/include/llvm/MC/MCInst.h @@ -181,7 +181,7 @@ public: MCOperand &getOperand(unsigned i) { return Operands[i]; } unsigned getNumOperands() const { return Operands.size(); } - void addOperand(const MCOperand &Op) { Operands.push_back(Op); } + void addOperand(const MCOperand Op) { Operands.push_back(Op); } using iterator = SmallVectorImpl<MCOperand>::iterator; using const_iterator = SmallVectorImpl<MCOperand>::const_iterator; diff --git a/llvm/include/llvm/MC/MCInstPrinter.h b/llvm/include/llvm/MC/MCInstPrinter.h index 71e049b92455..8b9ef178e33c 100644 --- a/llvm/include/llvm/MC/MCInstPrinter.h +++ b/llvm/include/llvm/MC/MCInstPrinter.h @@ -18,6 +18,7 @@ class MCAsmInfo; class MCInst; class MCOperand; class MCInstrInfo; +class MCInstrAnalysis; class MCRegisterInfo; class MCSubtargetInfo; class raw_ostream; @@ -48,6 +49,7 @@ protected: const MCAsmInfo &MAI; const MCInstrInfo &MII; const MCRegisterInfo &MRI; + const MCInstrAnalysis *MIA = nullptr; /// True if we are printing marked up assembly. bool UseMarkup = false; @@ -63,6 +65,9 @@ protected: /// (llvm-objdump -d). bool PrintBranchImmAsAddress = false; + /// If true, symbolize branch target and memory reference operands. + bool SymbolizeOperands = false; + /// Utility function for printing annotations. void printAnnotation(raw_ostream &OS, StringRef Annot); @@ -83,6 +88,10 @@ public: /// Specify a stream to emit comments to. void setCommentStream(raw_ostream &OS) { CommentStream = &OS; } + /// Returns a pair containing the mnemonic for \p MI and the number of bits + /// left for further processing by printInstruction (generated by tablegen). + virtual std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) = 0; + /// Print the specified MCInst to the specified raw_ostream. /// /// \p Address the address of current instruction on most targets, used to @@ -115,6 +124,9 @@ public: PrintBranchImmAsAddress = Value; } + void setSymbolizeOperands(bool Value) { SymbolizeOperands = Value; } + void setMCInstrAnalysis(const MCInstrAnalysis *Value) { MIA = Value; } + /// Utility function to print immediates in decimal or hex. format_object<int64_t> formatImm(int64_t Value) const { return PrintImmHex ? formatHex(Value) : formatDec(Value); diff --git a/llvm/include/llvm/MC/MCInstrDesc.h b/llvm/include/llvm/MC/MCInstrDesc.h index 17454e3134a2..cbb061fc6456 100644 --- a/llvm/include/llvm/MC/MCInstrDesc.h +++ b/llvm/include/llvm/MC/MCInstrDesc.h @@ -27,12 +27,22 @@ class MCInst; //===----------------------------------------------------------------------===// namespace MCOI { -// Operand constraints +/// Operand constraints. These are encoded in 16 bits with one of the +/// low-order 3 bits specifying that a constraint is present and the +/// corresponding high-order hex digit specifying the constraint value. +/// This allows for a maximum of 3 constraints. enum OperandConstraint { - TIED_TO = 0, // Must be allocated the same register as. - EARLY_CLOBBER // Operand is an early clobber register operand + TIED_TO = 0, // Must be allocated the same register as specified value. + EARLY_CLOBBER // If present, operand is an early clobber register. }; +// Define a macro to produce each constraint value. +#define MCOI_TIED_TO(op) \ + ((1 << MCOI::TIED_TO) | ((op) << (4 + MCOI::TIED_TO * 4))) + +#define MCOI_EARLY_CLOBBER \ + (1 << MCOI::EARLY_CLOBBER) + /// These are flags set on operands, but should be considered /// private, all access should go through the MCOperandInfo accessors. /// See the accessors for a description of what these are. @@ -84,10 +94,9 @@ public: /// Information about the type of the operand. uint8_t OperandType; - /// The lower 16 bits are used to specify which constraints are set. - /// The higher 16 bits are used to specify the value of constraints (4 bits - /// each). - uint32_t Constraints; + + /// Operand constraints (see OperandConstraint enum). + uint16_t Constraints; /// Set if this operand is a pointer value and it requires a callback /// to look up its register class. @@ -197,14 +206,14 @@ public: const MCPhysReg *ImplicitDefs; // Registers implicitly defined by this instr const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands - /// Returns the value of the specific constraint if - /// it is set. Returns -1 if it is not set. + /// Returns the value of the specified operand constraint if + /// it is present. Returns -1 if it is not present. int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const { if (OpNum < NumOperands && (OpInfo[OpNum].Constraints & (1 << Constraint))) { - unsigned Pos = 16 + Constraint * 4; - return (int)(OpInfo[OpNum].Constraints >> Pos) & 0xf; + unsigned ValuePos = 4 + Constraint * 4; + return (int)(OpInfo[OpNum].Constraints >> ValuePos) & 0x0f; } return -1; } diff --git a/llvm/include/llvm/MC/MCMachObjectWriter.h b/llvm/include/llvm/MC/MCMachObjectWriter.h index 38ba68b78fe1..f4f9c474cdcd 100644 --- a/llvm/include/llvm/MC/MCMachObjectWriter.h +++ b/llvm/include/llvm/MC/MCMachObjectWriter.h @@ -114,7 +114,7 @@ class MachObjectWriter : public MCObjectWriter { /// \name Symbol Table Data /// @{ - StringTableBuilder StringTable{StringTableBuilder::MachO}; + StringTableBuilder StringTable; std::vector<MachSymbolData> LocalSymbolData; std::vector<MachSymbolData> ExternalSymbolData; std::vector<MachSymbolData> UndefinedSymbolData; @@ -129,6 +129,8 @@ public: MachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW, raw_pwrite_stream &OS, bool IsLittleEndian) : TargetObjectWriter(std::move(MOTW)), + StringTable(TargetObjectWriter->is64Bit() ? StringTableBuilder::MachO64 + : StringTableBuilder::MachO), W(OS, IsLittleEndian ? support::little : support::big) {} support::endian::Writer W; @@ -233,16 +235,6 @@ public: Relocations[Sec].push_back(P); } - void recordScatteredRelocation(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - unsigned Log2Size, uint64_t &FixedValue); - - void recordTLVPRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue); - void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index ca04d8e8d3b6..2e6a84b6861f 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -174,6 +174,10 @@ protected: /// Section containing metadata on function stack sizes. MCSection *StackSizesSection = nullptr; + /// Section for pseudo probe information used by AutoFDO + MCSection *PseudoProbeSection = nullptr; + MCSection *PseudoProbeDescSection = nullptr; + // ELF specific sections. MCSection *DataRelROSection = nullptr; MCSection *MergeableConst4Section = nullptr; @@ -215,6 +219,7 @@ protected: MCSection *XDataSection = nullptr; MCSection *SXDataSection = nullptr; MCSection *GFIDsSection = nullptr; + MCSection *GIATsSection = nullptr; MCSection *GLJMPSection = nullptr; // XCOFF specific sections @@ -249,7 +254,6 @@ public: MCSection *getDataSection() const { return DataSection; } MCSection *getBSSSection() const { return BSSSection; } MCSection *getReadOnlySection() const { return ReadOnlySection; } - MCSection *getLSDASection() const { return LSDASection; } MCSection *getCompactUnwindSection() const { return CompactUnwindSection; } MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; } MCSection *getDwarfInfoSection() const { return DwarfInfoSection; } @@ -338,6 +342,12 @@ public: MCSection *getStackSizesSection(const MCSection &TextSec) const; + MCSection *getBBAddrMapSection(const MCSection &TextSec) const; + + MCSection *getPseudoProbeSection(const MCSection *TextSec) const; + + MCSection *getPseudoProbeDescSection(StringRef FuncName) const; + // ELF specific sections. MCSection *getDataRelROSection() const { return DataRelROSection; } const MCSection *getMergeableConst4Section() const { @@ -396,14 +406,13 @@ public: MCSection *getXDataSection() const { return XDataSection; } MCSection *getSXDataSection() const { return SXDataSection; } MCSection *getGFIDsSection() const { return GFIDsSection; } + MCSection *getGIATsSection() const { return GIATsSection; } MCSection *getGLJMPSection() const { return GLJMPSection; } // XCOFF specific sections MCSection *getTOCBaseSection() const { return TOCBaseSection; } - MCSection *getEHFrameSection() { - return EHFrameSection; - } + MCSection *getEHFrameSection() const { return EHFrameSection; } enum Environment { IsMachO, IsELF, IsCOFF, IsWasm, IsXCOFF }; Environment getObjectFileType() const { return Env; } diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index c3f3ae5de921..a00000bc11b6 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -179,6 +179,8 @@ public: SMLoc Loc = SMLoc()) override; void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr, SMLoc Loc = SMLoc()) override; + void emitNops(int64_t NumBytes, int64_t ControlledNopLength, + SMLoc Loc) override; void emitFileDirective(StringRef Filename) override; void emitAddrsig() override; diff --git a/llvm/include/llvm/MC/MCParser/AsmLexer.h b/llvm/include/llvm/MC/MCParser/AsmLexer.h index 05b3695bc7a0..e187a28f267d 100644 --- a/llvm/include/llvm/MC/MCParser/AsmLexer.h +++ b/llvm/include/llvm/MC/MCParser/AsmLexer.h @@ -56,6 +56,7 @@ private: bool isAtStartOfComment(const char *Ptr); bool isAtStatementSeparator(const char *Ptr); int getNextChar(); + int peekNextChar(); AsmToken ReturnError(const char *Loc, const std::string &Msg); AsmToken LexIdentifier(); diff --git a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h index e89abeaac94c..21966d1c742d 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h @@ -49,7 +49,11 @@ protected: // Can only create subclasses. bool SkipSpace = true; bool AllowAtInIdentifier; bool IsAtStartOfStatement = true; + bool LexMasmHexFloats = false; bool LexMasmIntegers = false; + bool LexMasmStrings = false; + bool UseMasmDefaultRadix = false; + unsigned DefaultRadix = 10; AsmCommentConsumer *CommentConsumer = nullptr; MCAsmLexer(); @@ -147,9 +151,23 @@ public: this->CommentConsumer = CommentConsumer; } - /// Set whether to lex masm-style binary and hex literals. They look like - /// 0b1101 and 0ABCh respectively. + /// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified + /// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]). void setLexMasmIntegers(bool V) { LexMasmIntegers = V; } + + /// Set whether to use masm-style default-radix integer literals. If disabled, + /// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]). + void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; } + + unsigned getMasmDefaultRadix() const { return DefaultRadix; } + void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; } + + /// Set whether to lex masm-style hex float literals, such as 3f800000r. + void setLexMasmHexFloats(bool V) { LexMasmHexFloats = V; } + + /// Set whether to lex masm-style string literals, such as 'Can''t find file' + /// and "This ""value"" not found". + void setLexMasmStrings(bool V) { LexMasmStrings = V; } }; } // end namespace llvm diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h index a68066e0f50b..391a6b0b575e 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -90,6 +90,20 @@ private: IdKind Kind; }; +// Generic type information for an assembly object. +// All sizes measured in bytes. +struct AsmTypeInfo { + StringRef Name; + unsigned Size = 0; + unsigned ElementSize = 0; + unsigned Length = 0; +}; + +struct AsmFieldInfo { + AsmTypeInfo Type; + unsigned Offset = 0; +}; + /// Generic Sema callback for assembly parser. class MCAsmParserSemaCallback { public: @@ -170,12 +184,17 @@ public: virtual bool isParsingMasm() const { return false; } - virtual bool lookUpField(StringRef Name, StringRef &Type, - unsigned &Offset) const { + virtual bool defineMacro(StringRef Name, StringRef Value) { return true; } + + virtual bool lookUpField(StringRef Name, AsmFieldInfo &Info) const { + return true; + } + virtual bool lookUpField(StringRef Base, StringRef Member, + AsmFieldInfo &Info) const { return true; } - virtual bool lookUpField(StringRef Base, StringRef Member, StringRef &Type, - unsigned &Offset) const { + + virtual bool lookUpType(StringRef Name, AsmTypeInfo &Info) const { return true; } @@ -281,7 +300,8 @@ public: /// \param Res - The value of the expression. The result is undefined /// on error. /// \return - False on success. - virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) = 0; + virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc, + AsmTypeInfo *TypeInfo) = 0; /// Parse an arbitrary expression, assuming that an initial '(' has /// already been consumed. diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h index 1d10c66b4201..0a1e50d501e9 100644 --- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -24,7 +24,6 @@ namespace llvm { class MCInst; -class MCParsedAsmOperand; class MCStreamer; class MCSubtargetInfo; template <typename T> class SmallVectorImpl; @@ -370,7 +369,7 @@ public: // Target-specific parsing of expression. virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { - return getParser().parsePrimaryExpr(Res, EndLoc); + return getParser().parsePrimaryExpr(Res, EndLoc, nullptr); } virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h new file mode 100644 index 000000000000..b9a6196777de --- /dev/null +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -0,0 +1,178 @@ +//===- MCPseudoProbe.h - Pseudo probe encoding support ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the MCPseudoProbe to support the pseudo +// probe encoding for AutoFDO. Pseudo probes together with their inline context +// are encoded in a DFS recursive way in the .pseudoprobe sections. For each +// .pseudoprobe section, the encoded binary data consist of a single or mutiple +// function records each for one outlined function. A function record has the +// following format : +// +// FUNCTION BODY (one for each outlined function present in the text section) +// GUID (uint64) +// GUID of the function +// NPROBES (ULEB128) +// Number of probes originating from this function. +// NUM_INLINED_FUNCTIONS (ULEB128) +// Number of callees inlined into this function, aka number of +// first-level inlinees +// PROBE RECORDS +// A list of NPROBES entries. Each entry contains: +// INDEX (ULEB128) +// TYPE (uint4) +// 0 - block probe, 1 - indirect call, 2 - direct call +// ATTRIBUTE (uint3) +// reserved +// ADDRESS_TYPE (uint1) +// 0 - code address, 1 - address delta +// CODE_ADDRESS (uint64 or ULEB128) +// code address or address delta, depending on ADDRESS_TYPE +// INLINED FUNCTION RECORDS +// A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined +// callees. Each record contains: +// INLINE SITE +// GUID of the inlinee (uint64) +// ID of the callsite probe (ULEB128) +// FUNCTION BODY +// A FUNCTION BODY entry describing the inlined function. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCPSEUDOPROBE_H +#define LLVM_MC_MCPSEUDOPROBE_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/MC/MCSection.h" +#include <functional> +#include <map> +#include <vector> + +namespace llvm { + +class MCStreamer; +class MCSymbol; +class MCObjectStreamer; + +enum class MCPseudoProbeFlag { + // If set, indicates that the probe is encoded as an address delta + // instead of a real code address. + AddressDelta = 0x1, +}; + +/// Instances of this class represent a pseudo probe instance for a pseudo probe +/// table entry, which is created during a machine instruction is assembled and +/// uses an address from a temporary label created at the current address in the +/// current section. +class MCPseudoProbe { + MCSymbol *Label; + uint64_t Guid; + uint64_t Index; + uint8_t Type; + uint8_t Attributes; + +public: + MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type, + uint64_t Attributes) + : Label(Label), Guid(Guid), Index(Index), Type(Type), + Attributes(Attributes) { + assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8"); + assert(Attributes <= 0xFF && + "Probe attributes too big to encode, exceeding 2^16"); + } + + MCSymbol *getLabel() const { return Label; } + + uint64_t getGuid() const { return Guid; } + + uint64_t getIndex() const { return Index; } + + uint8_t getType() const { return Type; } + + uint8_t getAttributes() const { return Attributes; } + + void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const; +}; + +// An inline frame has the form <Guid, ProbeID> +using InlineSite = std::tuple<uint64_t, uint32_t>; +using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>; + +// A Tri-tree based data structure to group probes by inline stack. +// A tree is allocated for a standalone .text section. A fake +// instance is created as the root of a tree. +// A real instance of this class is created for each function, either an +// unlined function that has code in .text section or an inlined function. +class MCPseudoProbeInlineTree { + uint64_t Guid; + // Set of probes that come with the function. + std::vector<MCPseudoProbe> Probes; + // Use std::map for a deterministic output. + std::map<InlineSite, MCPseudoProbeInlineTree *> Inlinees; + + // Root node has a GUID 0. + bool isRoot() { return Guid == 0; } + MCPseudoProbeInlineTree *getOrAddNode(InlineSite Site); + +public: + MCPseudoProbeInlineTree() = default; + MCPseudoProbeInlineTree(uint64_t Guid) : Guid(Guid) {} + ~MCPseudoProbeInlineTree(); + void addPseudoProbe(const MCPseudoProbe &Probe, + const MCPseudoProbeInlineStack &InlineStack); + void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe); +}; + +/// Instances of this class represent the pseudo probes inserted into a compile +/// unit. +class MCPseudoProbeSection { +public: + void addPseudoProbe(MCSection *Sec, const MCPseudoProbe &Probe, + const MCPseudoProbeInlineStack &InlineStack) { + MCProbeDivisions[Sec].addPseudoProbe(Probe, InlineStack); + } + + // TODO: Sort by getOrdinal to ensure a determinstic section order + using MCProbeDivisionMap = std::map<MCSection *, MCPseudoProbeInlineTree>; + +private: + // A collection of MCPseudoProbe for each text section. The MCPseudoProbes + // are grouped by GUID of the functions where they are from and will be + // encoded by groups. In the comdat scenario where a text section really only + // contains the code of a function solely, the probes associated with a comdat + // function are still grouped by GUIDs due to inlining that can bring probes + // from different functions into one function. + MCProbeDivisionMap MCProbeDivisions; + +public: + const MCProbeDivisionMap &getMCProbes() const { return MCProbeDivisions; } + + bool empty() const { return MCProbeDivisions.empty(); } + + void emit(MCObjectStreamer *MCOS); +}; + +class MCPseudoProbeTable { + // A collection of MCPseudoProbe in the current module grouped by text + // sections. MCPseudoProbes will be encoded into a corresponding + // .pseudoprobe section. With functions emitted as separate comdats, + // a text section really only contains the code of a function solely, and the + // probes associated with the text section will be emitted into a standalone + // .pseudoprobe section that shares the same comdat group with the function. + MCPseudoProbeSection MCProbeSections; + +public: + static void emit(MCObjectStreamer *MCOS); + + MCPseudoProbeSection &getProbeSections() { return MCProbeSections; } + +#ifndef NDEBUG + static int DdgPrintIndent; +#endif +}; +} // end namespace llvm + +#endif // LLVM_MC_MCPSEUDOPROBE_H diff --git a/llvm/include/llvm/MC/MCRegister.h b/llvm/include/llvm/MC/MCRegister.h index 1f3c4b8494cc..8bbeab5bef43 100644 --- a/llvm/include/llvm/MC/MCRegister.h +++ b/llvm/include/llvm/MC/MCRegister.h @@ -20,6 +20,7 @@ using MCPhysReg = uint16_t; /// Wrapper class representing physical registers. Should be passed by value. class MCRegister { + friend hash_code hash_value(const MCRegister &); unsigned Reg; public: @@ -46,31 +47,26 @@ public: /// register. StackSlot values do not exist in the MC layer, see /// Register::isStackSlot() for the more information on them. /// - /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack - /// slots, so if a variable may contains a stack slot, always check - /// isStackSlot() first. static bool isStackSlot(unsigned Reg) { - return !(Reg & VirtualRegFlag) && - uint32_t(Reg & ~VirtualRegFlag) >= FirstStackSlot; + return FirstStackSlot <= Reg && Reg < VirtualRegFlag; } /// Return true if the specified register number is in /// the physical register namespace. static bool isPhysicalRegister(unsigned Reg) { - assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first."); - return Reg >= FirstPhysicalReg && !(Reg & VirtualRegFlag); - } - - /// Return true if the specified register number is in the physical register - /// namespace. - bool isPhysical() const { - return isPhysicalRegister(Reg); + return FirstPhysicalReg <= Reg && Reg < FirstStackSlot; } constexpr operator unsigned() const { return Reg; } + /// Check the provided unsigned value is a valid MCRegister. + static MCRegister from(unsigned Val) { + assert(Val == NoRegister || isPhysicalRegister(Val)); + return MCRegister(Val); + } + unsigned id() const { return Reg; } @@ -110,6 +106,9 @@ template<> struct DenseMapInfo<MCRegister> { } }; +inline hash_code hash_value(const MCRegister &Reg) { + return hash_value(Reg.id()); +} } #endif // ifndef LLVM_MC_REGISTER_H diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h index 9864d95d19e0..0c1ac6254ec1 100644 --- a/llvm/include/llvm/MC/MCRegisterInfo.h +++ b/llvm/include/llvm/MC/MCRegisterInfo.h @@ -675,6 +675,7 @@ public: MCRegUnitIterator(MCRegister Reg, const MCRegisterInfo *MCRI) { assert(Reg && "Null register has no regunits"); + assert(MCRegister::isPhysicalRegister(Reg.id())); // Decode the RegUnits MCRegisterDesc field. unsigned RU = MCRI->get(Reg).RegUnits; unsigned Scale = RU & 15; diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h index 66c5659af3a7..ee0e5b4df9f0 100644 --- a/llvm/include/llvm/MC/MCSchedule.h +++ b/llvm/include/llvm/MC/MCSchedule.h @@ -205,7 +205,7 @@ struct MCExtraProcessorInfo { /// subtargets can't be done. Nonetheless, the abstract model is /// useful. Futhermore, subtargets typically extend this model with processor /// specific resources to model any hardware features that can be exploited by -/// sceduling heuristics and aren't sufficiently represented in the abstract. +/// scheduling heuristics and aren't sufficiently represented in the abstract. /// /// The abstract pipeline is built around the notion of an "issue point". This /// is merely a reference point for counting machine cycles. The physical diff --git a/llvm/include/llvm/MC/MCSectionXCOFF.h b/llvm/include/llvm/MC/MCSectionXCOFF.h index eed6b9c2609c..aa39dff07180 100644 --- a/llvm/include/llvm/MC/MCSectionXCOFF.h +++ b/llvm/include/llvm/MC/MCSectionXCOFF.h @@ -34,22 +34,23 @@ class MCSectionXCOFF final : public MCSection { XCOFF::StorageMappingClass MappingClass; XCOFF::SymbolType Type; - XCOFF::StorageClass StorageClass; MCSymbolXCOFF *const QualName; StringRef SymbolTableName; + bool MultiSymbolsAllowed; static constexpr unsigned DefaultAlignVal = 4; MCSectionXCOFF(StringRef Name, XCOFF::StorageMappingClass SMC, - XCOFF::SymbolType ST, XCOFF::StorageClass SC, SectionKind K, - MCSymbolXCOFF *QualName, MCSymbol *Begin, - StringRef SymbolTableName) + XCOFF::SymbolType ST, SectionKind K, MCSymbolXCOFF *QualName, + MCSymbol *Begin, StringRef SymbolTableName, + bool MultiSymbolsAllowed) : MCSection(SV_XCOFF, Name, K, Begin), MappingClass(SMC), Type(ST), - StorageClass(SC), QualName(QualName), SymbolTableName(SymbolTableName) { + QualName(QualName), SymbolTableName(SymbolTableName), + MultiSymbolsAllowed(MultiSymbolsAllowed) { assert((ST == XCOFF::XTY_SD || ST == XCOFF::XTY_CM || ST == XCOFF::XTY_ER) && "Invalid or unhandled type for csect."); assert(QualName != nullptr && "QualName is needed."); - QualName->setStorageClass(SC); QualName->setRepresentedCsect(this); + QualName->setStorageClass(XCOFF::C_HIDEXT); // A csect is 4 byte aligned by default, except for undefined symbol csects. if (Type != XCOFF::XTY_ER) setAlignment(Align(DefaultAlignVal)); @@ -65,7 +66,9 @@ public: } XCOFF::StorageMappingClass getMappingClass() const { return MappingClass; } - XCOFF::StorageClass getStorageClass() const { return StorageClass; } + XCOFF::StorageClass getStorageClass() const { + return QualName->getStorageClass(); + } XCOFF::SymbolType getCSectType() const { return Type; } MCSymbolXCOFF *getQualNameSymbol() const { return QualName; } @@ -75,6 +78,7 @@ public: bool UseCodeAlign() const override; bool isVirtualSection() const override; StringRef getSymbolTableName() const { return SymbolTableName; } + bool isMultiSymbolsAllowed() const { return MultiSymbolsAllowed; } }; } // end namespace llvm diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 484c62538366..cdc728f73772 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -13,6 +13,7 @@ #ifndef LLVM_MC_MCSTREAMER_H #define LLVM_MC_MCSTREAMER_H +#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" @@ -20,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCLinkerOptimizationHint.h" +#include "llvm/MC/MCPseudoProbe.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCWinEH.h" #include "llvm/Support/Error.h" @@ -205,6 +207,7 @@ class MCStreamer { std::vector<std::unique_ptr<WinEH::FrameInfo>> WinFrameInfos; WinEH::FrameInfo *CurrentWinFrameInfo; + size_t CurrentProcWinFrameInfoStartIndex; /// Tracks an index to represent the order a symbol was emitted in. /// Zero means we did not emit that symbol. @@ -214,6 +217,10 @@ class MCStreamer { /// PushSection. SmallVector<std::pair<MCSectionSubPair, MCSectionSubPair>, 4> SectionStack; + /// Pointer to the parser's SMLoc if available. This is used to provide + /// locations for diagnostics. + const SMLoc *StartTokLocPtr = nullptr; + /// The next unique ID to use when creating a WinCFI-related section (.pdata /// or .xdata). This ID ensures that we have a one-to-one mapping from /// code section to unwind info section, which MSVC's incremental linker @@ -239,6 +246,8 @@ protected: return CurrentWinFrameInfo; } + virtual void EmitWindowsUnwindTables(WinEH::FrameInfo *Frame); + virtual void EmitWindowsUnwindTables(); virtual void emitRawTextImpl(StringRef String); @@ -258,6 +267,11 @@ public: TargetStreamer.reset(TS); } + void setStartTokLocPtr(const SMLoc *Loc) { StartTokLocPtr = Loc; } + SMLoc getStartTokLoc() const { + return StartTokLocPtr ? *StartTokLocPtr : SMLoc(); + } + /// State management /// virtual void reset(); @@ -442,6 +456,10 @@ public: /// so we can sort on them later. void AssignFragment(MCSymbol *Symbol, MCFragment *Fragment); + /// Returns the mnemonic for \p MI, if the streamer has access to a + /// instruction printer and returns an empty string otherwise. + virtual StringRef getMnemonic(MCInst &MI) { return ""; } + /// Emit a label for \p Symbol into the current section. /// /// This corresponds to an assembler statement such as: @@ -673,6 +691,7 @@ public: /// Special case of EmitValue that avoids the client having /// to pass in a MCExpr for constant integers. virtual void emitIntValue(uint64_t Value, unsigned Size); + virtual void emitIntValue(APInt Value); /// Special case of EmitValue that avoids the client having to pass /// in a MCExpr for constant integers & prints in Hex format for certain @@ -777,6 +796,9 @@ public: virtual void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr, SMLoc Loc = SMLoc()); + virtual void emitNops(int64_t NumBytes, int64_t ControlledNopLength, + SMLoc Loc); + /// Emit NumBytes worth of zeros. /// This function properly handles data in virtual sections. void emitZeros(uint64_t NumBytes); @@ -1028,6 +1050,11 @@ public: /// Emit the given \p Instruction into the current section. virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI); + /// Emit the a pseudo probe into the current section. + virtual void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type, + uint64_t Attr, + const MCPseudoProbeInlineStack &InlineStack); + /// Set the bundle alignment mode from now on in the section. /// The argument is the power of 2 to which the alignment is set. The /// value 0 means turn the bundle alignment off. @@ -1050,7 +1077,7 @@ public: /// Streamer specific finalization. virtual void finishImpl(); /// Finish emission of machine code. - void Finish(); + void Finish(SMLoc EndLoc = SMLoc()); virtual bool mayHaveInstructions(MCSection &Sec) const { return true; } }; @@ -1059,28 +1086,6 @@ public: /// timing the assembler front end. MCStreamer *createNullStreamer(MCContext &Ctx); -/// Create a machine code streamer which will print out assembly for the native -/// target, suitable for compiling with a native assembler. -/// -/// \param InstPrint - If given, the instruction printer to use. If not given -/// the MCInst representation will be printed. This method takes ownership of -/// InstPrint. -/// -/// \param CE - If given, a code emitter to use to show the instruction -/// encoding inline with the assembly. This method takes ownership of \p CE. -/// -/// \param TAB - If given, a target asm backend to use to show the fixup -/// information in conjunction with encoding information. This method takes -/// ownership of \p TAB. -/// -/// \param ShowInst - Whether to show the MCInst representation inline with -/// the assembly. -MCStreamer *createAsmStreamer(MCContext &Ctx, - std::unique_ptr<formatted_raw_ostream> OS, - bool isVerboseAsm, bool useDwarfDirectory, - MCInstPrinter *InstPrint, MCCodeEmitter *CE, - MCAsmBackend *TAB, bool ShowInst); - } // end namespace llvm #endif // LLVM_MC_MCSTREAMER_H diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h index 61cbb842502e..2c1072d833fb 100644 --- a/llvm/include/llvm/MC/MCSubtargetInfo.h +++ b/llvm/include/llvm/MC/MCSubtargetInfo.h @@ -54,6 +54,7 @@ struct SubtargetFeatureKV { struct SubtargetSubTypeKV { const char *Key; ///< K-V key string FeatureBitArray Implies; ///< K-V bit mask + FeatureBitArray TuneImplies; ///< K-V bit mask const MCSchedModel *SchedModel; /// Compare routine for std::lower_bound @@ -74,6 +75,7 @@ struct SubtargetSubTypeKV { class MCSubtargetInfo { Triple TargetTriple; std::string CPU; // CPU being targeted. + std::string TuneCPU; // CPU being tuned for. ArrayRef<SubtargetFeatureKV> ProcFeatures; // Processor feature list ArrayRef<SubtargetSubTypeKV> ProcDesc; // Processor descriptions @@ -90,8 +92,8 @@ class MCSubtargetInfo { public: MCSubtargetInfo(const MCSubtargetInfo &) = default; - MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS, - ArrayRef<SubtargetFeatureKV> PF, + MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef TuneCPU, + StringRef FS, ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, const InstrStage *IS, @@ -103,6 +105,7 @@ public: const Triple &getTargetTriple() const { return TargetTriple; } StringRef getCPU() const { return CPU; } + StringRef getTuneCPU() const { return TuneCPU; } const FeatureBitset& getFeatureBits() const { return FeatureBits; } void setFeatureBits(const FeatureBitset &FeatureBits_) { @@ -118,12 +121,12 @@ protected: /// /// FIXME: Find a way to stick this in the constructor, since it should only /// be called during initialization. - void InitMCProcessorInfo(StringRef CPU, StringRef FS); + void InitMCProcessorInfo(StringRef CPU, StringRef TuneCPU, StringRef FS); public: - /// Set the features to the default for the given CPU with an appended feature - /// string. - void setDefaultFeatures(StringRef CPU, StringRef FS); + /// Set the features to the default for the given CPU and TuneCPU, with ano + /// appended feature string. + void setDefaultFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); /// Toggle a feature and return the re-computed feature bits. /// This version does not change the implied bits. @@ -210,15 +213,16 @@ public: void initInstrItins(InstrItineraryData &InstrItins) const; /// Resolve a variant scheduling class for the given MCInst and CPU. - virtual unsigned - resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI, - unsigned CPUID) const { + virtual unsigned resolveVariantSchedClass(unsigned SchedClass, + const MCInst *MI, + const MCInstrInfo *MCII, + unsigned CPUID) const { return 0; } /// Check whether the CPU string is valid. bool isCPUStringValid(StringRef CPU) const { - auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU); + auto Found = llvm::lower_bound(ProcDesc, CPU); return Found != ProcDesc.end() && StringRef(Found->Key) == CPU; } diff --git a/llvm/include/llvm/MC/MCSymbol.h b/llvm/include/llvm/MC/MCSymbol.h index 84263bf94035..a83781f5c586 100644 --- a/llvm/include/llvm/MC/MCSymbol.h +++ b/llvm/include/llvm/MC/MCSymbol.h @@ -16,6 +16,7 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFragment.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -27,7 +28,6 @@ namespace llvm { class MCAsmInfo; class MCContext; -class MCExpr; class MCSection; class raw_ostream; @@ -94,7 +94,8 @@ protected: mutable unsigned IsRegistered : 1; - /// This symbol is visible outside this translation unit. + /// True if this symbol is visible outside this translation unit. Note: ELF + /// uses binding instead of this bit. mutable unsigned IsExternal : 1; /// This symbol is private extern. diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h index ffd8a7aad312..ae512fd27be2 100644 --- a/llvm/include/llvm/MC/MCSymbolWasm.h +++ b/llvm/include/llvm/MC/MCSymbolWasm.h @@ -25,6 +25,7 @@ class MCSymbolWasm : public MCSymbol { Optional<StringRef> ExportName; wasm::WasmSignature *Signature = nullptr; Optional<wasm::WasmGlobalType> GlobalType; + Optional<wasm::ValType> TableType; Optional<wasm::WasmEventType> EventType; /// An expression describing how to calculate the size of a symbol. If a @@ -42,6 +43,7 @@ public: bool isFunction() const { return Type == wasm::WASM_SYMBOL_TYPE_FUNCTION; } bool isData() const { return Type == wasm::WASM_SYMBOL_TYPE_DATA; } bool isGlobal() const { return Type == wasm::WASM_SYMBOL_TYPE_GLOBAL; } + bool isTable() const { return Type == wasm::WASM_SYMBOL_TYPE_TABLE; } bool isSection() const { return Type == wasm::WASM_SYMBOL_TYPE_SECTION; } bool isEvent() const { return Type == wasm::WASM_SYMBOL_TYPE_EVENT; } wasm::WasmSymbolType getType() const { return Type; } @@ -94,6 +96,15 @@ public: StringRef getExportName() const { return ExportName.getValue(); } void setExportName(StringRef Name) { ExportName = Name; } + bool isFunctionTable() const { + return isTable() && hasTableType() && + getTableType() == wasm::ValType::FUNCREF; + } + void setFunctionTable() { + setType(wasm::WASM_SYMBOL_TYPE_TABLE); + setTableType(wasm::ValType::FUNCREF); + } + void setUsedInGOT() const { IsUsedInGOT = true; } bool isUsedInGOT() const { return IsUsedInGOT; } @@ -109,6 +120,13 @@ public: } void setGlobalType(wasm::WasmGlobalType GT) { GlobalType = GT; } + bool hasTableType() const { return TableType.hasValue(); } + wasm::ValType getTableType() const { + assert(hasTableType()); + return TableType.getValue(); + } + void setTableType(wasm::ValType TT) { TableType = TT; } + const wasm::WasmEventType &getEventType() const { assert(EventType.hasValue()); return EventType.getValue(); diff --git a/llvm/include/llvm/MC/MCSymbolXCOFF.h b/llvm/include/llvm/MC/MCSymbolXCOFF.h index d0379ec08b7d..752e1e7bba0f 100644 --- a/llvm/include/llvm/MC/MCSymbolXCOFF.h +++ b/llvm/include/llvm/MC/MCSymbolXCOFF.h @@ -35,8 +35,6 @@ public: } void setStorageClass(XCOFF::StorageClass SC) { - assert((!StorageClass.hasValue() || StorageClass.getValue() == SC) && - "Redefining StorageClass of XCOFF MCSymbol."); StorageClass = SC; }; @@ -48,8 +46,6 @@ public: StringRef getUnqualifiedName() const { return getUnqualifiedName(getName()); } - bool hasRepresentedCsectSet() const { return RepresentedCsect != nullptr; } - MCSectionXCOFF *getRepresentedCsect() const; void setRepresentedCsect(MCSectionXCOFF *C); diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h index 4b786751dbd1..d29a74905ebf 100644 --- a/llvm/include/llvm/MC/MCTargetOptions.h +++ b/llvm/include/llvm/MC/MCTargetOptions.h @@ -22,6 +22,7 @@ enum class ExceptionHandling { ARM, ///< ARM EHABI WinEH, ///< Windows Exception Handling Wasm, ///< WebAssembly Exception Handling + AIX, ///< AIX Exception Handling }; enum class DebugCompressionType { diff --git a/llvm/include/llvm/MC/MCWasmObjectWriter.h b/llvm/include/llvm/MC/MCWasmObjectWriter.h index 382818ad6867..00da632bbcc6 100644 --- a/llvm/include/llvm/MC/MCWasmObjectWriter.h +++ b/llvm/include/llvm/MC/MCWasmObjectWriter.h @@ -52,6 +52,10 @@ std::unique_ptr<MCObjectWriter> createWasmObjectWriter(std::unique_ptr<MCWasmObjectTargetWriter> MOTW, raw_pwrite_stream &OS); +std::unique_ptr<MCObjectWriter> +createWasmDwoObjectWriter(std::unique_ptr<MCWasmObjectTargetWriter> MOTW, + raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS); + } // namespace llvm #endif diff --git a/llvm/include/llvm/MC/MCWasmStreamer.h b/llvm/include/llvm/MC/MCWasmStreamer.h index 61075e7a5732..6651f071f799 100644 --- a/llvm/include/llvm/MC/MCWasmStreamer.h +++ b/llvm/include/llvm/MC/MCWasmStreamer.h @@ -59,13 +59,9 @@ public: SMLoc Loc = SMLoc()) override; void emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment = 0) override; - void emitValueImpl(const MCExpr *Value, unsigned Size, - SMLoc Loc = SMLoc()) override; void emitIdent(StringRef IdentString) override; - void emitValueToAlignment(unsigned, int64_t, unsigned, unsigned) override; - void finishImpl() override; private: diff --git a/llvm/include/llvm/MC/MCWin64EH.h b/llvm/include/llvm/MC/MCWin64EH.h index 60ec06e61b7c..065161d1759e 100644 --- a/llvm/include/llvm/MC/MCWin64EH.h +++ b/llvm/include/llvm/MC/MCWin64EH.h @@ -53,14 +53,15 @@ struct Instruction { class UnwindEmitter : public WinEH::UnwindEmitter { public: void Emit(MCStreamer &Streamer) const override; - void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI) const override; + void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI, + bool HandlerData) const override; }; class ARM64UnwindEmitter : public WinEH::UnwindEmitter { public: void Emit(MCStreamer &Streamer) const override; - void EmitUnwindInfo(MCStreamer &Streamer, - WinEH::FrameInfo *FI) const override; + void EmitUnwindInfo(MCStreamer &Streamer, WinEH::FrameInfo *FI, + bool HandlerData) const override; }; } diff --git a/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/llvm/include/llvm/MC/MCWinCOFFStreamer.h index 1236304b9e5d..53b2ef0bd96e 100644 --- a/llvm/include/llvm/MC/MCWinCOFFStreamer.h +++ b/llvm/include/llvm/MC/MCWinCOFFStreamer.h @@ -58,6 +58,7 @@ public: unsigned ByteAlignment) override; void emitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; + void emitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override; void emitZerofill(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment, SMLoc Loc = SMLoc()) override; void emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size, diff --git a/llvm/include/llvm/MC/MCWinEH.h b/llvm/include/llvm/MC/MCWinEH.h index b1c28c0ecae7..5688255810d0 100644 --- a/llvm/include/llvm/MC/MCWinEH.h +++ b/llvm/include/llvm/MC/MCWinEH.h @@ -26,6 +26,14 @@ struct Instruction { Instruction(unsigned Op, MCSymbol *L, unsigned Reg, unsigned Off) : Label(L), Offset(Off), Register(Reg), Operation(Op) {} + + bool operator==(const Instruction &I) const { + // Check whether two instructions refer to the same operation + // applied at a different spot (i.e. pointing at a different label). + return Offset == I.Offset && Register == I.Register && + Operation == I.Operation; + } + bool operator!=(const Instruction &I) const { return !(*this == I); } }; struct FrameInfo { @@ -36,10 +44,12 @@ struct FrameInfo { const MCSymbol *Function = nullptr; const MCSymbol *PrologEnd = nullptr; const MCSymbol *Symbol = nullptr; - const MCSection *TextSection = nullptr; + MCSection *TextSection = nullptr; + uint32_t PackedInfo = 0; bool HandlesUnwind = false; bool HandlesExceptions = false; + bool EmitAttempted = false; int LastFrameInst = -1; const FrameInfo *ChainedParent = nullptr; @@ -53,6 +63,15 @@ struct FrameInfo { const FrameInfo *ChainedParent) : Begin(BeginFuncEHLabel), Function(Function), ChainedParent(ChainedParent) {} + + bool empty() const { + if (!Instructions.empty()) + return false; + for (const auto &E : EpilogMap) + if (!E.second.empty()) + return false; + return true; + } }; class UnwindEmitter { @@ -61,7 +80,8 @@ public: /// This emits the unwind info sections (.pdata and .xdata in PE/COFF). virtual void Emit(MCStreamer &Streamer) const = 0; - virtual void EmitUnwindInfo(MCStreamer &Streamer, FrameInfo *FI) const = 0; + virtual void EmitUnwindInfo(MCStreamer &Streamer, FrameInfo *FI, + bool HandlerData) const = 0; }; } } diff --git a/llvm/include/llvm/MC/StringTableBuilder.h b/llvm/include/llvm/MC/StringTableBuilder.h index d8bfac03f7f2..3f9c91be05d3 100644 --- a/llvm/include/llvm/MC/StringTableBuilder.h +++ b/llvm/include/llvm/MC/StringTableBuilder.h @@ -22,7 +22,17 @@ class raw_ostream; /// Utility for building string tables with deduplicated suffixes. class StringTableBuilder { public: - enum Kind { ELF, WinCOFF, MachO, RAW, DWARF, XCOFF }; + enum Kind { + ELF, + WinCOFF, + MachO, + MachO64, + MachOLinked, + MachO64Linked, + RAW, + DWARF, + XCOFF + }; private: DenseMap<CachedHashStringRef, size_t> StringIndexMap; diff --git a/llvm/include/llvm/MC/SubtargetFeature.h b/llvm/include/llvm/MC/SubtargetFeature.h index 01ea794a4bc3..cc36b25a4965 100644 --- a/llvm/include/llvm/MC/SubtargetFeature.h +++ b/llvm/include/llvm/MC/SubtargetFeature.h @@ -30,7 +30,7 @@ namespace llvm { class raw_ostream; class Triple; -const unsigned MAX_SUBTARGET_WORDS = 3; +const unsigned MAX_SUBTARGET_WORDS = 4; const unsigned MAX_SUBTARGET_FEATURES = MAX_SUBTARGET_WORDS * 64; /// Container class for subtarget features. diff --git a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h index 6c196757e571..0293364e26ef 100644 --- a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h +++ b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h @@ -267,9 +267,9 @@ public: // This routine performs a sanity check. This routine should only be called // when we know that 'IR' is not in the scheduler's instruction queues. void sanityCheck(const InstRef &IR) const { - assert(find(WaitSet, IR) == WaitSet.end() && "Already in the wait set!"); - assert(find(ReadySet, IR) == ReadySet.end() && "Already in the ready set!"); - assert(find(IssuedSet, IR) == IssuedSet.end() && "Already executing!"); + assert(!is_contained(WaitSet, IR) && "Already in the wait set!"); + assert(!is_contained(ReadySet, IR) && "Already in the ready set!"); + assert(!is_contained(IssuedSet, IR) && "Already executing!"); } #endif // !NDEBUG }; diff --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h index 274ffd90c05a..7eaf13e8fb22 100644 --- a/llvm/include/llvm/Object/ArchiveWriter.h +++ b/llvm/include/llvm/Object/ArchiveWriter.h @@ -39,6 +39,12 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers, bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic, bool Thin, std::unique_ptr<MemoryBuffer> OldArchiveBuf = nullptr); + +// writeArchiveToBuffer is similar to writeArchive but returns the Archive in a +// buffer instead of writing it out to a file. +Expected<std::unique_ptr<MemoryBuffer>> +writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers, bool WriteSymtab, + object::Archive::Kind Kind, bool Deterministic, bool Thin); } #endif diff --git a/llvm/include/llvm/Object/Binary.h b/llvm/include/llvm/Object/Binary.h index e95516f30a40..dd98e1143e25 100644 --- a/llvm/include/llvm/Object/Binary.h +++ b/llvm/include/llvm/Object/Binary.h @@ -91,6 +91,8 @@ public: Binary(const Binary &other) = delete; virtual ~Binary(); + virtual Error initContent() { return Error::success(); }; + StringRef getData() const; StringRef getFileName() const; MemoryBufferRef getMemoryBufferRef() const; @@ -163,8 +165,8 @@ public: static Error checkOffset(MemoryBufferRef M, uintptr_t Addr, const uint64_t Size) { if (Addr + Size < Addr || Addr + Size < Size || - Addr + Size > uintptr_t(M.getBufferEnd()) || - Addr < uintptr_t(M.getBufferStart())) { + Addr + Size > reinterpret_cast<uintptr_t>(M.getBufferEnd()) || + Addr < reinterpret_cast<uintptr_t>(M.getBufferStart())) { return errorCodeToError(object_error::unexpected_eof); } return Error::success(); @@ -178,7 +180,8 @@ DEFINE_ISA_CONVERSION_FUNCTIONS(Binary, LLVMBinaryRef) /// /// @param Source The data to create the Binary from. Expected<std::unique_ptr<Binary>> createBinary(MemoryBufferRef Source, - LLVMContext *Context = nullptr); + LLVMContext *Context = nullptr, + bool InitContent = true); template <typename T> class OwningBinary { std::unique_ptr<T> Bin; @@ -228,7 +231,9 @@ template <typename T> const T* OwningBinary<T>::getBinary() const { return Bin.get(); } -Expected<OwningBinary<Binary>> createBinary(StringRef Path); +Expected<OwningBinary<Binary>> createBinary(StringRef Path, + LLVMContext *Context = nullptr, + bool InitContent = true); } // end namespace object diff --git a/llvm/include/llvm/Object/COFF.h b/llvm/include/llvm/Object/COFF.h index 8aef00a8809d..e7cf1b5495c6 100644 --- a/llvm/include/llvm/Object/COFF.h +++ b/llvm/include/llvm/Object/COFF.h @@ -576,11 +576,22 @@ struct coff_tls_directory { uint32_t getAlignment() const { // Bit [20:24] contains section alignment. - uint32_t Shift = (Characteristics & 0x00F00000) >> 20; + uint32_t Shift = (Characteristics & COFF::IMAGE_SCN_ALIGN_MASK) >> 20; if (Shift > 0) return 1U << (Shift - 1); return 0; } + + void setAlignment(uint32_t Align) { + uint32_t AlignBits = 0; + if (Align) { + assert(llvm::isPowerOf2_32(Align) && "alignment is not a power of 2"); + assert(llvm::Log2_32(Align) <= 13 && "alignment requested is too large"); + AlignBits = (llvm::Log2_32(Align) + 1) << 20; + } + Characteristics = + (Characteristics & ~COFF::IMAGE_SCN_ALIGN_MASK) | AlignBits; + } }; using coff_tls_directory32 = coff_tls_directory<support::little32_t>; @@ -786,6 +797,8 @@ private: const coff_base_reloc_block_header *BaseRelocEnd; const debug_directory *DebugDirectoryBegin; const debug_directory *DebugDirectoryEnd; + const coff_tls_directory32 *TLSDirectory32; + const coff_tls_directory64 *TLSDirectory64; // Either coff_load_configuration32 or coff_load_configuration64. const void *LoadConfig = nullptr; @@ -805,6 +818,7 @@ private: Error initExportTablePtr(); Error initBaseRelocPtr(); Error initDebugDirectoryPtr(); + Error initTLSDirectoryPtr(); Error initLoadConfigPtr(); public: @@ -976,6 +990,13 @@ public: return make_range(debug_directory_begin(), debug_directory_end()); } + const coff_tls_directory32 *getTLSDirectory32() const { + return TLSDirectory32; + } + const coff_tls_directory64 *getTLSDirectory64() const { + return TLSDirectory64; + } + const dos_header *getDOSHeader() const { if (!PE32Header && !PE32PlusHeader) return nullptr; diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index b44dd3f48661..447b4c25ce81 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -30,6 +30,43 @@ namespace llvm { namespace object { +struct VerdAux { + unsigned Offset; + std::string Name; +}; + +struct VerDef { + unsigned Offset; + unsigned Version; + unsigned Flags; + unsigned Ndx; + unsigned Cnt; + unsigned Hash; + std::string Name; + std::vector<VerdAux> AuxV; +}; + +struct VernAux { + unsigned Hash; + unsigned Flags; + unsigned Other; + unsigned Offset; + std::string Name; +}; + +struct VerNeed { + unsigned Version; + unsigned Cnt; + unsigned Offset; + std::string File; + std::vector<VernAux> AuxV; +}; + +struct VersionEntry { + std::string Name; + bool IsVerDef; +}; + StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type); uint32_t getELFRelativeRelocationType(uint32_t Machine); StringRef getELFSectionTypeName(uint32_t Machine, uint32_t Type); @@ -48,14 +85,51 @@ static inline Error createError(const Twine &Err) { return make_error<StringError>(Err, object_error::parse_failed); } +enum PPCInstrMasks : uint64_t { + PADDI_R12_NO_DISP = 0x0610000039800000, + PLD_R12_NO_DISP = 0x04100000E5800000, + MTCTR_R12 = 0x7D8903A6, + BCTR = 0x4E800420, +}; + template <class ELFT> class ELFFile; +template <class T> struct DataRegion { + // This constructor is used when we know the start and the size of a data + // region. We assume that Arr does not go past the end of the file. + DataRegion(ArrayRef<T> Arr) : First(Arr.data()), Size(Arr.size()) {} + + // Sometimes we only know the start of a data region. We still don't want to + // read past the end of the file, so we provide the end of a buffer. + DataRegion(const T *Data, const uint8_t *BufferEnd) + : First(Data), BufEnd(BufferEnd) {} + + Expected<T> operator[](uint64_t N) { + assert(Size || BufEnd); + if (Size) { + if (N >= *Size) + return createError( + "the index is greater than or equal to the number of entries (" + + Twine(*Size) + ")"); + } else { + const uint8_t *EntryStart = (const uint8_t *)First + N * sizeof(T); + if (EntryStart + sizeof(T) > BufEnd) + return createError("can't read past the end of the file"); + } + return *(First + N); + } + + const T *First; + Optional<uint64_t> Size = None; + const uint8_t *BufEnd = nullptr; +}; + template <class ELFT> -std::string getSecIndexForError(const ELFFile<ELFT> *Obj, - const typename ELFT::Shdr *Sec) { - auto TableOrErr = Obj->sections(); +std::string getSecIndexForError(const ELFFile<ELFT> &Obj, + const typename ELFT::Shdr &Sec) { + auto TableOrErr = Obj.sections(); if (TableOrErr) - return "[index " + std::to_string(Sec - &TableOrErr->front()) + "]"; + return "[index " + std::to_string(&Sec - &TableOrErr->front()) + "]"; // To make this helper be more convenient for error reporting purposes we // drop the error. But really it should never be triggered. Before this point, // our code should have called 'sections()' and reported a proper error on @@ -65,11 +139,21 @@ std::string getSecIndexForError(const ELFFile<ELFT> *Obj, } template <class ELFT> -std::string getPhdrIndexForError(const ELFFile<ELFT> *Obj, - const typename ELFT::Phdr *Phdr) { - auto Headers = Obj->program_headers(); +static std::string describe(const ELFFile<ELFT> &Obj, + const typename ELFT::Shdr &Sec) { + unsigned SecNdx = &Sec - &cantFail(Obj.sections()).front(); + return (object::getELFSectionTypeName(Obj.getHeader().e_machine, + Sec.sh_type) + + " section with index " + Twine(SecNdx)) + .str(); +} + +template <class ELFT> +std::string getPhdrIndexForError(const ELFFile<ELFT> &Obj, + const typename ELFT::Phdr &Phdr) { + auto Headers = Obj.program_headers(); if (Headers) - return ("[index " + Twine(Phdr - &Headers->front()) + "]").str(); + return ("[index " + Twine(&Phdr - &Headers->front()) + "]").str(); // See comment in the getSecIndexForError() above. llvm::consumeError(Headers.takeError()); return "[unknown index]"; @@ -83,32 +167,6 @@ template <class ELFT> class ELFFile { public: LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - using uintX_t = typename ELFT::uint; - using Elf_Ehdr = typename ELFT::Ehdr; - using Elf_Shdr = typename ELFT::Shdr; - using Elf_Sym = typename ELFT::Sym; - using Elf_Dyn = typename ELFT::Dyn; - using Elf_Phdr = typename ELFT::Phdr; - using Elf_Rel = typename ELFT::Rel; - using Elf_Rela = typename ELFT::Rela; - using Elf_Relr = typename ELFT::Relr; - using Elf_Verdef = typename ELFT::Verdef; - using Elf_Verdaux = typename ELFT::Verdaux; - using Elf_Verneed = typename ELFT::Verneed; - using Elf_Vernaux = typename ELFT::Vernaux; - using Elf_Versym = typename ELFT::Versym; - using Elf_Hash = typename ELFT::Hash; - using Elf_GnuHash = typename ELFT::GnuHash; - using Elf_Nhdr = typename ELFT::Nhdr; - using Elf_Note = typename ELFT::Note; - using Elf_Note_Iterator = typename ELFT::NoteIterator; - using Elf_Dyn_Range = typename ELFT::DynRange; - using Elf_Shdr_Range = typename ELFT::ShdrRange; - using Elf_Sym_Range = typename ELFT::SymRange; - using Elf_Rel_Range = typename ELFT::RelRange; - using Elf_Rela_Range = typename ELFT::RelaRange; - using Elf_Relr_Range = typename ELFT::RelrRange; - using Elf_Phdr_Range = typename ELFT::PhdrRange; // This is a callback that can be passed to a number of functions. // It can be used to ignore non-critical errors (warnings), which is @@ -118,6 +176,7 @@ public: using WarningHandler = llvm::function_ref<Error(const Twine &Msg)>; const uint8_t *base() const { return Buf.bytes_begin(); } + const uint8_t *end() const { return base() + getBufSize(); } size_t getBufSize() const { return Buf.size(); } @@ -127,26 +186,39 @@ private: ELFFile(StringRef Object); public: - const Elf_Ehdr *getHeader() const { - return reinterpret_cast<const Elf_Ehdr *>(base()); + const Elf_Ehdr &getHeader() const { + return *reinterpret_cast<const Elf_Ehdr *>(base()); } template <typename T> Expected<const T *> getEntry(uint32_t Section, uint32_t Entry) const; template <typename T> - Expected<const T *> getEntry(const Elf_Shdr *Section, uint32_t Entry) const; + Expected<const T *> getEntry(const Elf_Shdr &Section, uint32_t Entry) const; + + Expected<std::vector<VerDef>> + getVersionDefinitions(const Elf_Shdr &Sec) const; + Expected<std::vector<VerNeed>> getVersionDependencies( + const Elf_Shdr &Sec, + WarningHandler WarnHandler = &defaultWarningHandler) const; + Expected<StringRef> + getSymbolVersionByIndex(uint32_t SymbolVersionIndex, bool &IsDefault, + SmallVector<Optional<VersionEntry>, 0> &VersionMap, + Optional<bool> IsSymHidden) const; Expected<StringRef> - getStringTable(const Elf_Shdr *Section, + getStringTable(const Elf_Shdr &Section, WarningHandler WarnHandler = &defaultWarningHandler) const; Expected<StringRef> getStringTableForSymtab(const Elf_Shdr &Section) const; Expected<StringRef> getStringTableForSymtab(const Elf_Shdr &Section, Elf_Shdr_Range Sections) const; + Expected<StringRef> getLinkAsStrtab(const typename ELFT::Shdr &Sec) const; Expected<ArrayRef<Elf_Word>> getSHNDXTable(const Elf_Shdr &Section) const; Expected<ArrayRef<Elf_Word>> getSHNDXTable(const Elf_Shdr &Section, Elf_Shdr_Range Sections) const; + Expected<uint64_t> getDynSymtabSize() const; + StringRef getRelocationTypeName(uint32_t Type) const; void getRelocationTypeName(uint32_t Type, SmallVectorImpl<char> &Result) const; @@ -156,18 +228,21 @@ public: std::string getDynamicTagAsString(uint64_t Type) const; /// Get the symbol for a given relocation. - Expected<const Elf_Sym *> getRelocationSymbol(const Elf_Rel *Rel, + Expected<const Elf_Sym *> getRelocationSymbol(const Elf_Rel &Rel, const Elf_Shdr *SymTab) const; + Expected<SmallVector<Optional<VersionEntry>, 0>> + loadVersionMap(const Elf_Shdr *VerNeedSec, const Elf_Shdr *VerDefSec) const; + static Expected<ELFFile> create(StringRef Object); bool isLE() const { - return getHeader()->getDataEncoding() == ELF::ELFDATA2LSB; + return getHeader().getDataEncoding() == ELF::ELFDATA2LSB; } bool isMipsELF64() const { - return getHeader()->e_machine == ELF::EM_MIPS && - getHeader()->getFileClass() == ELF::ELFCLASS64; + return getHeader().e_machine == ELF::EM_MIPS && + getHeader().getFileClass() == ELF::ELFCLASS64; } bool isMips64EL() const { return isMipsELF64() && isLE(); } @@ -176,48 +251,50 @@ public: Expected<Elf_Dyn_Range> dynamicEntries() const; - Expected<const uint8_t *> toMappedAddr(uint64_t VAddr) const; + Expected<const uint8_t *> + toMappedAddr(uint64_t VAddr, + WarningHandler WarnHandler = &defaultWarningHandler) const; Expected<Elf_Sym_Range> symbols(const Elf_Shdr *Sec) const { if (!Sec) return makeArrayRef<Elf_Sym>(nullptr, nullptr); - return getSectionContentsAsArray<Elf_Sym>(Sec); + return getSectionContentsAsArray<Elf_Sym>(*Sec); } - Expected<Elf_Rela_Range> relas(const Elf_Shdr *Sec) const { + Expected<Elf_Rela_Range> relas(const Elf_Shdr &Sec) const { return getSectionContentsAsArray<Elf_Rela>(Sec); } - Expected<Elf_Rel_Range> rels(const Elf_Shdr *Sec) const { + Expected<Elf_Rel_Range> rels(const Elf_Shdr &Sec) const { return getSectionContentsAsArray<Elf_Rel>(Sec); } - Expected<Elf_Relr_Range> relrs(const Elf_Shdr *Sec) const { + Expected<Elf_Relr_Range> relrs(const Elf_Shdr &Sec) const { return getSectionContentsAsArray<Elf_Relr>(Sec); } - Expected<std::vector<Elf_Rela>> decode_relrs(Elf_Relr_Range relrs) const; + std::vector<Elf_Rel> decode_relrs(Elf_Relr_Range relrs) const; - Expected<std::vector<Elf_Rela>> android_relas(const Elf_Shdr *Sec) const; + Expected<std::vector<Elf_Rela>> android_relas(const Elf_Shdr &Sec) const; /// Iterate over program header table. Expected<Elf_Phdr_Range> program_headers() const { - if (getHeader()->e_phnum && getHeader()->e_phentsize != sizeof(Elf_Phdr)) + if (getHeader().e_phnum && getHeader().e_phentsize != sizeof(Elf_Phdr)) return createError("invalid e_phentsize: " + - Twine(getHeader()->e_phentsize)); + Twine(getHeader().e_phentsize)); uint64_t HeadersSize = - (uint64_t)getHeader()->e_phnum * getHeader()->e_phentsize; - uint64_t PhOff = getHeader()->e_phoff; + (uint64_t)getHeader().e_phnum * getHeader().e_phentsize; + uint64_t PhOff = getHeader().e_phoff; if (PhOff + HeadersSize < PhOff || PhOff + HeadersSize > getBufSize()) return createError("program headers are longer than binary of size " + Twine(getBufSize()) + ": e_phoff = 0x" + - Twine::utohexstr(getHeader()->e_phoff) + - ", e_phnum = " + Twine(getHeader()->e_phnum) + - ", e_phentsize = " + Twine(getHeader()->e_phentsize)); + Twine::utohexstr(getHeader().e_phoff) + + ", e_phnum = " + Twine(getHeader().e_phnum) + + ", e_phentsize = " + Twine(getHeader().e_phentsize)); auto *Begin = reinterpret_cast<const Elf_Phdr *>(base() + PhOff); - return makeArrayRef(Begin, Begin + getHeader()->e_phnum); + return makeArrayRef(Begin, Begin + getHeader().e_phnum); } /// Get an iterator over notes in a program header. @@ -231,9 +308,9 @@ public: assert(Phdr.p_type == ELF::PT_NOTE && "Phdr is not of type PT_NOTE"); ErrorAsOutParameter ErrAsOutParam(&Err); if (Phdr.p_offset + Phdr.p_filesz > getBufSize()) { - Err = createError("PT_NOTE header has invalid offset (0x" + - Twine::utohexstr(Phdr.p_offset) + ") or size (0x" + - Twine::utohexstr(Phdr.p_filesz) + ")"); + Err = + createError("invalid offset (0x" + Twine::utohexstr(Phdr.p_offset) + + ") or size (0x" + Twine::utohexstr(Phdr.p_filesz) + ")"); return Elf_Note_Iterator(Err); } return Elf_Note_Iterator(base() + Phdr.p_offset, Phdr.p_filesz, Err); @@ -250,10 +327,9 @@ public: assert(Shdr.sh_type == ELF::SHT_NOTE && "Shdr is not of type SHT_NOTE"); ErrorAsOutParameter ErrAsOutParam(&Err); if (Shdr.sh_offset + Shdr.sh_size > getBufSize()) { - Err = createError("SHT_NOTE section " + getSecIndexForError(this, &Shdr) + - " has invalid offset (0x" + - Twine::utohexstr(Shdr.sh_offset) + ") or size (0x" + - Twine::utohexstr(Shdr.sh_size) + ")"); + Err = + createError("invalid offset (0x" + Twine::utohexstr(Shdr.sh_offset) + + ") or size (0x" + Twine::utohexstr(Shdr.sh_size) + ")"); return Elf_Note_Iterator(Err); } return Elf_Note_Iterator(base() + Shdr.sh_offset, Shdr.sh_size, Err); @@ -291,28 +367,28 @@ public: Expected<StringRef> getSectionStringTable( Elf_Shdr_Range Sections, WarningHandler WarnHandler = &defaultWarningHandler) const; - Expected<uint32_t> getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms, - ArrayRef<Elf_Word> ShndxTable) const; - Expected<const Elf_Shdr *> getSection(const Elf_Sym *Sym, + Expected<uint32_t> getSectionIndex(const Elf_Sym &Sym, Elf_Sym_Range Syms, + DataRegion<Elf_Word> ShndxTable) const; + Expected<const Elf_Shdr *> getSection(const Elf_Sym &Sym, const Elf_Shdr *SymTab, - ArrayRef<Elf_Word> ShndxTable) const; - Expected<const Elf_Shdr *> getSection(const Elf_Sym *Sym, + DataRegion<Elf_Word> ShndxTable) const; + Expected<const Elf_Shdr *> getSection(const Elf_Sym &Sym, Elf_Sym_Range Symtab, - ArrayRef<Elf_Word> ShndxTable) const; + DataRegion<Elf_Word> ShndxTable) const; Expected<const Elf_Shdr *> getSection(uint32_t Index) const; Expected<const Elf_Sym *> getSymbol(const Elf_Shdr *Sec, uint32_t Index) const; Expected<StringRef> - getSectionName(const Elf_Shdr *Section, + getSectionName(const Elf_Shdr &Section, WarningHandler WarnHandler = &defaultWarningHandler) const; - Expected<StringRef> getSectionName(const Elf_Shdr *Section, + Expected<StringRef> getSectionName(const Elf_Shdr &Section, StringRef DotShstrtab) const; template <typename T> - Expected<ArrayRef<T>> getSectionContentsAsArray(const Elf_Shdr *Sec) const; - Expected<ArrayRef<uint8_t>> getSectionContents(const Elf_Shdr *Sec) const; - Expected<ArrayRef<uint8_t>> getSegmentContents(const Elf_Phdr *Phdr) const; + Expected<ArrayRef<T>> getSectionContentsAsArray(const Elf_Shdr &Sec) const; + Expected<ArrayRef<uint8_t>> getSectionContents(const Elf_Shdr &Sec) const; + Expected<ArrayRef<uint8_t>> getSegmentContents(const Elf_Phdr &Phdr) const; }; using ELF32LEFile = ELFFile<ELF32LE>; @@ -330,29 +406,30 @@ getSection(typename ELFT::ShdrRange Sections, uint32_t Index) { template <class ELFT> inline Expected<uint32_t> -getExtendedSymbolTableIndex(const typename ELFT::Sym *Sym, - const typename ELFT::Sym *FirstSym, - ArrayRef<typename ELFT::Word> ShndxTable) { - assert(Sym->st_shndx == ELF::SHN_XINDEX); - unsigned Index = Sym - FirstSym; - if (Index >= ShndxTable.size()) +getExtendedSymbolTableIndex(const typename ELFT::Sym &Sym, unsigned SymIndex, + DataRegion<typename ELFT::Word> ShndxTable) { + assert(Sym.st_shndx == ELF::SHN_XINDEX); + if (!ShndxTable.First) return createError( - "extended symbol index (" + Twine(Index) + - ") is past the end of the SHT_SYMTAB_SHNDX section of size " + - Twine(ShndxTable.size())); + "found an extended symbol index (" + Twine(SymIndex) + + "), but unable to locate the extended symbol index table"); - // The size of the table was checked in getSHNDXTable. - return ShndxTable[Index]; + Expected<typename ELFT::Word> TableOrErr = ShndxTable[SymIndex]; + if (!TableOrErr) + return createError("unable to read an extended symbol table at index " + + Twine(SymIndex) + ": " + + toString(TableOrErr.takeError())); + return *TableOrErr; } template <class ELFT> Expected<uint32_t> -ELFFile<ELFT>::getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms, - ArrayRef<Elf_Word> ShndxTable) const { - uint32_t Index = Sym->st_shndx; +ELFFile<ELFT>::getSectionIndex(const Elf_Sym &Sym, Elf_Sym_Range Syms, + DataRegion<Elf_Word> ShndxTable) const { + uint32_t Index = Sym.st_shndx; if (Index == ELF::SHN_XINDEX) { - auto ErrorOrIndex = getExtendedSymbolTableIndex<ELFT>( - Sym, Syms.begin(), ShndxTable); + Expected<uint32_t> ErrorOrIndex = + getExtendedSymbolTableIndex<ELFT>(Sym, &Sym - Syms.begin(), ShndxTable); if (!ErrorOrIndex) return ErrorOrIndex.takeError(); return *ErrorOrIndex; @@ -364,8 +441,8 @@ ELFFile<ELFT>::getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms, template <class ELFT> Expected<const typename ELFT::Shdr *> -ELFFile<ELFT>::getSection(const Elf_Sym *Sym, const Elf_Shdr *SymTab, - ArrayRef<Elf_Word> ShndxTable) const { +ELFFile<ELFT>::getSection(const Elf_Sym &Sym, const Elf_Shdr *SymTab, + DataRegion<Elf_Word> ShndxTable) const { auto SymsOrErr = symbols(SymTab); if (!SymsOrErr) return SymsOrErr.takeError(); @@ -374,8 +451,8 @@ ELFFile<ELFT>::getSection(const Elf_Sym *Sym, const Elf_Shdr *SymTab, template <class ELFT> Expected<const typename ELFT::Shdr *> -ELFFile<ELFT>::getSection(const Elf_Sym *Sym, Elf_Sym_Range Symbols, - ArrayRef<Elf_Word> ShndxTable) const { +ELFFile<ELFT>::getSection(const Elf_Sym &Sym, Elf_Sym_Range Symbols, + DataRegion<Elf_Word> ShndxTable) const { auto IndexOrErr = getSectionIndex(Sym, Symbols, ShndxTable); if (!IndexOrErr) return IndexOrErr.takeError(); @@ -395,7 +472,7 @@ ELFFile<ELFT>::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const { Elf_Sym_Range Symbols = *SymsOrErr; if (Index >= Symbols.size()) return createError("unable to get symbol from section " + - getSecIndexForError(this, Sec) + + getSecIndexForError(*this, *Sec) + ": invalid symbol index (" + Twine(Index) + ")"); return &Symbols[Index]; } @@ -403,26 +480,27 @@ ELFFile<ELFT>::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const { template <class ELFT> template <typename T> Expected<ArrayRef<T>> -ELFFile<ELFT>::getSectionContentsAsArray(const Elf_Shdr *Sec) const { - if (Sec->sh_entsize != sizeof(T) && sizeof(T) != 1) - return createError("section " + getSecIndexForError(this, Sec) + - " has an invalid sh_entsize: " + Twine(Sec->sh_entsize)); +ELFFile<ELFT>::getSectionContentsAsArray(const Elf_Shdr &Sec) const { + if (Sec.sh_entsize != sizeof(T) && sizeof(T) != 1) + return createError("section " + getSecIndexForError(*this, Sec) + + " has invalid sh_entsize: expected " + Twine(sizeof(T)) + + ", but got " + Twine(Sec.sh_entsize)); - uintX_t Offset = Sec->sh_offset; - uintX_t Size = Sec->sh_size; + uintX_t Offset = Sec.sh_offset; + uintX_t Size = Sec.sh_size; if (Size % sizeof(T)) - return createError("section " + getSecIndexForError(this, Sec) + + return createError("section " + getSecIndexForError(*this, Sec) + " has an invalid sh_size (" + Twine(Size) + ") which is not a multiple of its sh_entsize (" + - Twine(Sec->sh_entsize) + ")"); + Twine(Sec.sh_entsize) + ")"); if (std::numeric_limits<uintX_t>::max() - Offset < Size) - return createError("section " + getSecIndexForError(this, Sec) + + return createError("section " + getSecIndexForError(*this, Sec) + " has a sh_offset (0x" + Twine::utohexstr(Offset) + ") + sh_size (0x" + Twine::utohexstr(Size) + ") that cannot be represented"); if (Offset + Size > Buf.size()) - return createError("section " + getSecIndexForError(this, Sec) + + return createError("section " + getSecIndexForError(*this, Sec) + " has a sh_offset (0x" + Twine::utohexstr(Offset) + ") + sh_size (0x" + Twine::utohexstr(Size) + ") that is greater than the file size (0x" + @@ -438,17 +516,17 @@ ELFFile<ELFT>::getSectionContentsAsArray(const Elf_Shdr *Sec) const { template <class ELFT> Expected<ArrayRef<uint8_t>> -ELFFile<ELFT>::getSegmentContents(const Elf_Phdr *Phdr) const { - uintX_t Offset = Phdr->p_offset; - uintX_t Size = Phdr->p_filesz; +ELFFile<ELFT>::getSegmentContents(const Elf_Phdr &Phdr) const { + uintX_t Offset = Phdr.p_offset; + uintX_t Size = Phdr.p_filesz; if (std::numeric_limits<uintX_t>::max() - Offset < Size) - return createError("program header " + getPhdrIndexForError(this, Phdr) + + return createError("program header " + getPhdrIndexForError(*this, Phdr) + " has a p_offset (0x" + Twine::utohexstr(Offset) + ") + p_filesz (0x" + Twine::utohexstr(Size) + ") that cannot be represented"); if (Offset + Size > Buf.size()) - return createError("program header " + getPhdrIndexForError(this, Phdr) + + return createError("program header " + getPhdrIndexForError(*this, Phdr) + " has a p_offset (0x" + Twine::utohexstr(Offset) + ") + p_filesz (0x" + Twine::utohexstr(Size) + ") that is greater than the file size (0x" + @@ -458,13 +536,13 @@ ELFFile<ELFT>::getSegmentContents(const Elf_Phdr *Phdr) const { template <class ELFT> Expected<ArrayRef<uint8_t>> -ELFFile<ELFT>::getSectionContents(const Elf_Shdr *Sec) const { +ELFFile<ELFT>::getSectionContents(const Elf_Shdr &Sec) const { return getSectionContentsAsArray<uint8_t>(Sec); } template <class ELFT> StringRef ELFFile<ELFT>::getRelocationTypeName(uint32_t Type) const { - return getELFRelocationTypeName(getHeader()->e_machine, Type); + return getELFRelocationTypeName(getHeader().e_machine, Type); } template <class ELFT> @@ -500,24 +578,61 @@ void ELFFile<ELFT>::getRelocationTypeName(uint32_t Type, template <class ELFT> uint32_t ELFFile<ELFT>::getRelativeRelocationType() const { - return getELFRelativeRelocationType(getHeader()->e_machine); + return getELFRelativeRelocationType(getHeader().e_machine); +} + +template <class ELFT> +Expected<SmallVector<Optional<VersionEntry>, 0>> +ELFFile<ELFT>::loadVersionMap(const Elf_Shdr *VerNeedSec, + const Elf_Shdr *VerDefSec) const { + SmallVector<Optional<VersionEntry>, 0> VersionMap; + + // The first two version indexes are reserved. + // Index 0 is VER_NDX_LOCAL, index 1 is VER_NDX_GLOBAL. + VersionMap.push_back(VersionEntry()); + VersionMap.push_back(VersionEntry()); + + auto InsertEntry = [&](unsigned N, StringRef Version, bool IsVerdef) { + if (N >= VersionMap.size()) + VersionMap.resize(N + 1); + VersionMap[N] = {std::string(Version), IsVerdef}; + }; + + if (VerDefSec) { + Expected<std::vector<VerDef>> Defs = getVersionDefinitions(*VerDefSec); + if (!Defs) + return Defs.takeError(); + for (const VerDef &Def : *Defs) + InsertEntry(Def.Ndx & ELF::VERSYM_VERSION, Def.Name, true); + } + + if (VerNeedSec) { + Expected<std::vector<VerNeed>> Deps = getVersionDependencies(*VerNeedSec); + if (!Deps) + return Deps.takeError(); + for (const VerNeed &Dep : *Deps) + for (const VernAux &Aux : Dep.AuxV) + InsertEntry(Aux.Other & ELF::VERSYM_VERSION, Aux.Name, false); + } + + return VersionMap; } template <class ELFT> Expected<const typename ELFT::Sym *> -ELFFile<ELFT>::getRelocationSymbol(const Elf_Rel *Rel, +ELFFile<ELFT>::getRelocationSymbol(const Elf_Rel &Rel, const Elf_Shdr *SymTab) const { - uint32_t Index = Rel->getSymbol(isMips64EL()); + uint32_t Index = Rel.getSymbol(isMips64EL()); if (Index == 0) return nullptr; - return getEntry<Elf_Sym>(SymTab, Index); + return getEntry<Elf_Sym>(*SymTab, Index); } template <class ELFT> Expected<StringRef> ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections, WarningHandler WarnHandler) const { - uint32_t Index = getHeader()->e_shstrndx; + uint32_t Index = getHeader().e_shstrndx; if (Index == ELF::SHN_XINDEX) { // If the section name string table section index is greater than // or equal to SHN_LORESERVE, then the actual index of the section name @@ -535,7 +650,100 @@ ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections, if (Index >= Sections.size()) return createError("section header string table index " + Twine(Index) + " does not exist"); - return getStringTable(&Sections[Index], WarnHandler); + return getStringTable(Sections[Index], WarnHandler); +} + +/// This function finds the number of dynamic symbols using a GNU hash table. +/// +/// @param Table The GNU hash table for .dynsym. +template <class ELFT> +static Expected<uint64_t> +getDynSymtabSizeFromGnuHash(const typename ELFT::GnuHash &Table, + const void *BufEnd) { + using Elf_Word = typename ELFT::Word; + if (Table.nbuckets == 0) + return Table.symndx + 1; + uint64_t LastSymIdx = 0; + // Find the index of the first symbol in the last chain. + for (Elf_Word Val : Table.buckets()) + LastSymIdx = std::max(LastSymIdx, (uint64_t)Val); + const Elf_Word *It = + reinterpret_cast<const Elf_Word *>(Table.values(LastSymIdx).end()); + // Locate the end of the chain to find the last symbol index. + while (It < BufEnd && (*It & 1) == 0) { + ++LastSymIdx; + ++It; + } + if (It >= BufEnd) { + return createStringError( + object_error::parse_failed, + "no terminator found for GNU hash section before buffer end"); + } + return LastSymIdx + 1; +} + +/// This function determines the number of dynamic symbols. It reads section +/// headers first. If section headers are not available, the number of +/// symbols will be inferred by parsing dynamic hash tables. +template <class ELFT> +Expected<uint64_t> ELFFile<ELFT>::getDynSymtabSize() const { + // Read .dynsym section header first if available. + Expected<Elf_Shdr_Range> SectionsOrError = sections(); + if (!SectionsOrError) + return SectionsOrError.takeError(); + for (const Elf_Shdr &Sec : *SectionsOrError) { + if (Sec.sh_type == ELF::SHT_DYNSYM) { + if (Sec.sh_size % Sec.sh_entsize != 0) { + return createStringError(object_error::parse_failed, + "SHT_DYNSYM section has sh_size (" + + Twine(Sec.sh_size) + ") % sh_entsize (" + + Twine(Sec.sh_entsize) + ") that is not 0"); + } + return Sec.sh_size / Sec.sh_entsize; + } + } + + if (!SectionsOrError->empty()) { + // Section headers are available but .dynsym header is not found. + // Return 0 as .dynsym does not exist. + return 0; + } + + // Section headers do not exist. Falling back to infer + // upper bound of .dynsym from .gnu.hash and .hash. + Expected<Elf_Dyn_Range> DynTable = dynamicEntries(); + if (!DynTable) + return DynTable.takeError(); + llvm::Optional<uint64_t> ElfHash; + llvm::Optional<uint64_t> ElfGnuHash; + for (const Elf_Dyn &Entry : *DynTable) { + switch (Entry.d_tag) { + case ELF::DT_HASH: + ElfHash = Entry.d_un.d_ptr; + break; + case ELF::DT_GNU_HASH: + ElfGnuHash = Entry.d_un.d_ptr; + break; + } + } + if (ElfGnuHash) { + Expected<const uint8_t *> TablePtr = toMappedAddr(*ElfGnuHash); + if (!TablePtr) + return TablePtr.takeError(); + const Elf_GnuHash *Table = + reinterpret_cast<const Elf_GnuHash *>(TablePtr.get()); + return getDynSymtabSizeFromGnuHash<ELFT>(*Table, this->Buf.bytes_end()); + } + + // Search SYSV hash table to try to find the upper bound of dynsym. + if (ElfHash) { + Expected<const uint8_t *> TablePtr = toMappedAddr(*ElfHash); + if (!TablePtr) + return TablePtr.takeError(); + const Elf_Hash *Table = reinterpret_cast<const Elf_Hash *>(TablePtr.get()); + return Table->nchain; + } + return 0; } template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {} @@ -551,13 +759,13 @@ Expected<ELFFile<ELFT>> ELFFile<ELFT>::create(StringRef Object) { template <class ELFT> Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const { - const uintX_t SectionTableOffset = getHeader()->e_shoff; + const uintX_t SectionTableOffset = getHeader().e_shoff; if (SectionTableOffset == 0) return ArrayRef<Elf_Shdr>(); - if (getHeader()->e_shentsize != sizeof(Elf_Shdr)) + if (getHeader().e_shentsize != sizeof(Elf_Shdr)) return createError("invalid e_shentsize in ELF header: " + - Twine(getHeader()->e_shentsize)); + Twine(getHeader().e_shentsize)); const uint64_t FileSize = Buf.size(); if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize || @@ -574,7 +782,7 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const { const Elf_Shdr *First = reinterpret_cast<const Elf_Shdr *>(base() + SectionTableOffset); - uintX_t NumSections = getHeader()->e_shnum; + uintX_t NumSections = getHeader().e_shnum; if (NumSections == 0) NumSections = First->sh_size; @@ -605,24 +813,228 @@ Expected<const T *> ELFFile<ELFT>::getEntry(uint32_t Section, auto SecOrErr = getSection(Section); if (!SecOrErr) return SecOrErr.takeError(); - return getEntry<T>(*SecOrErr, Entry); + return getEntry<T>(**SecOrErr, Entry); } template <class ELFT> template <typename T> -Expected<const T *> ELFFile<ELFT>::getEntry(const Elf_Shdr *Section, +Expected<const T *> ELFFile<ELFT>::getEntry(const Elf_Shdr &Section, uint32_t Entry) const { - if (sizeof(T) != Section->sh_entsize) - return createError("section " + getSecIndexForError(this, Section) + - " has invalid sh_entsize: expected " + Twine(sizeof(T)) + - ", but got " + Twine(Section->sh_entsize)); - uint64_t Pos = Section->sh_offset + (uint64_t)Entry * sizeof(T); - if (Pos + sizeof(T) > Buf.size()) - return createError("unable to access section " + - getSecIndexForError(this, Section) + " data at 0x" + - Twine::utohexstr(Pos) + - ": offset goes past the end of file"); - return reinterpret_cast<const T *>(base() + Pos); + Expected<ArrayRef<T>> EntriesOrErr = getSectionContentsAsArray<T>(Section); + if (!EntriesOrErr) + return EntriesOrErr.takeError(); + + ArrayRef<T> Arr = *EntriesOrErr; + if (Entry >= Arr.size()) + return createError( + "can't read an entry at 0x" + + Twine::utohexstr(Entry * static_cast<uint64_t>(sizeof(T))) + + ": it goes past the end of the section (0x" + + Twine::utohexstr(Section.sh_size) + ")"); + return &Arr[Entry]; +} + +template <typename ELFT> +Expected<StringRef> ELFFile<ELFT>::getSymbolVersionByIndex( + uint32_t SymbolVersionIndex, bool &IsDefault, + SmallVector<Optional<VersionEntry>, 0> &VersionMap, + Optional<bool> IsSymHidden) const { + size_t VersionIndex = SymbolVersionIndex & llvm::ELF::VERSYM_VERSION; + + // Special markers for unversioned symbols. + if (VersionIndex == llvm::ELF::VER_NDX_LOCAL || + VersionIndex == llvm::ELF::VER_NDX_GLOBAL) { + IsDefault = false; + return ""; + } + + // Lookup this symbol in the version table. + if (VersionIndex >= VersionMap.size() || !VersionMap[VersionIndex]) + return createError("SHT_GNU_versym section refers to a version index " + + Twine(VersionIndex) + " which is missing"); + + const VersionEntry &Entry = *VersionMap[VersionIndex]; + // A default version (@@) is only available for defined symbols. + if (!Entry.IsVerDef || IsSymHidden.getValueOr(false)) + IsDefault = false; + else + IsDefault = !(SymbolVersionIndex & llvm::ELF::VERSYM_HIDDEN); + return Entry.Name.c_str(); +} + +template <class ELFT> +Expected<std::vector<VerDef>> +ELFFile<ELFT>::getVersionDefinitions(const Elf_Shdr &Sec) const { + Expected<StringRef> StrTabOrErr = getLinkAsStrtab(Sec); + if (!StrTabOrErr) + return StrTabOrErr.takeError(); + + Expected<ArrayRef<uint8_t>> ContentsOrErr = getSectionContents(Sec); + if (!ContentsOrErr) + return createError("cannot read content of " + describe(*this, Sec) + ": " + + toString(ContentsOrErr.takeError())); + + const uint8_t *Start = ContentsOrErr->data(); + const uint8_t *End = Start + ContentsOrErr->size(); + + auto ExtractNextAux = [&](const uint8_t *&VerdauxBuf, + unsigned VerDefNdx) -> Expected<VerdAux> { + if (VerdauxBuf + sizeof(Elf_Verdaux) > End) + return createError("invalid " + describe(*this, Sec) + + ": version definition " + Twine(VerDefNdx) + + " refers to an auxiliary entry that goes past the end " + "of the section"); + + auto *Verdaux = reinterpret_cast<const Elf_Verdaux *>(VerdauxBuf); + VerdauxBuf += Verdaux->vda_next; + + VerdAux Aux; + Aux.Offset = VerdauxBuf - Start; + if (Verdaux->vda_name <= StrTabOrErr->size()) + Aux.Name = std::string(StrTabOrErr->drop_front(Verdaux->vda_name)); + else + Aux.Name = ("<invalid vda_name: " + Twine(Verdaux->vda_name) + ">").str(); + return Aux; + }; + + std::vector<VerDef> Ret; + const uint8_t *VerdefBuf = Start; + for (unsigned I = 1; I <= /*VerDefsNum=*/Sec.sh_info; ++I) { + if (VerdefBuf + sizeof(Elf_Verdef) > End) + return createError("invalid " + describe(*this, Sec) + + ": version definition " + Twine(I) + + " goes past the end of the section"); + + if (reinterpret_cast<uintptr_t>(VerdefBuf) % sizeof(uint32_t) != 0) + return createError( + "invalid " + describe(*this, Sec) + + ": found a misaligned version definition entry at offset 0x" + + Twine::utohexstr(VerdefBuf - Start)); + + unsigned Version = *reinterpret_cast<const Elf_Half *>(VerdefBuf); + if (Version != 1) + return createError("unable to dump " + describe(*this, Sec) + + ": version " + Twine(Version) + + " is not yet supported"); + + const Elf_Verdef *D = reinterpret_cast<const Elf_Verdef *>(VerdefBuf); + VerDef &VD = *Ret.emplace(Ret.end()); + VD.Offset = VerdefBuf - Start; + VD.Version = D->vd_version; + VD.Flags = D->vd_flags; + VD.Ndx = D->vd_ndx; + VD.Cnt = D->vd_cnt; + VD.Hash = D->vd_hash; + + const uint8_t *VerdauxBuf = VerdefBuf + D->vd_aux; + for (unsigned J = 0; J < D->vd_cnt; ++J) { + if (reinterpret_cast<uintptr_t>(VerdauxBuf) % sizeof(uint32_t) != 0) + return createError("invalid " + describe(*this, Sec) + + ": found a misaligned auxiliary entry at offset 0x" + + Twine::utohexstr(VerdauxBuf - Start)); + + Expected<VerdAux> AuxOrErr = ExtractNextAux(VerdauxBuf, I); + if (!AuxOrErr) + return AuxOrErr.takeError(); + + if (J == 0) + VD.Name = AuxOrErr->Name; + else + VD.AuxV.push_back(*AuxOrErr); + } + + VerdefBuf += D->vd_next; + } + + return Ret; +} + +template <class ELFT> +Expected<std::vector<VerNeed>> +ELFFile<ELFT>::getVersionDependencies(const Elf_Shdr &Sec, + WarningHandler WarnHandler) const { + StringRef StrTab; + Expected<StringRef> StrTabOrErr = getLinkAsStrtab(Sec); + if (!StrTabOrErr) { + if (Error E = WarnHandler(toString(StrTabOrErr.takeError()))) + return std::move(E); + } else { + StrTab = *StrTabOrErr; + } + + Expected<ArrayRef<uint8_t>> ContentsOrErr = getSectionContents(Sec); + if (!ContentsOrErr) + return createError("cannot read content of " + describe(*this, Sec) + ": " + + toString(ContentsOrErr.takeError())); + + const uint8_t *Start = ContentsOrErr->data(); + const uint8_t *End = Start + ContentsOrErr->size(); + const uint8_t *VerneedBuf = Start; + + std::vector<VerNeed> Ret; + for (unsigned I = 1; I <= /*VerneedNum=*/Sec.sh_info; ++I) { + if (VerneedBuf + sizeof(Elf_Verdef) > End) + return createError("invalid " + describe(*this, Sec) + + ": version dependency " + Twine(I) + + " goes past the end of the section"); + + if (reinterpret_cast<uintptr_t>(VerneedBuf) % sizeof(uint32_t) != 0) + return createError( + "invalid " + describe(*this, Sec) + + ": found a misaligned version dependency entry at offset 0x" + + Twine::utohexstr(VerneedBuf - Start)); + + unsigned Version = *reinterpret_cast<const Elf_Half *>(VerneedBuf); + if (Version != 1) + return createError("unable to dump " + describe(*this, Sec) + + ": version " + Twine(Version) + + " is not yet supported"); + + const Elf_Verneed *Verneed = + reinterpret_cast<const Elf_Verneed *>(VerneedBuf); + + VerNeed &VN = *Ret.emplace(Ret.end()); + VN.Version = Verneed->vn_version; + VN.Cnt = Verneed->vn_cnt; + VN.Offset = VerneedBuf - Start; + + if (Verneed->vn_file < StrTab.size()) + VN.File = std::string(StrTab.drop_front(Verneed->vn_file)); + else + VN.File = ("<corrupt vn_file: " + Twine(Verneed->vn_file) + ">").str(); + + const uint8_t *VernauxBuf = VerneedBuf + Verneed->vn_aux; + for (unsigned J = 0; J < Verneed->vn_cnt; ++J) { + if (reinterpret_cast<uintptr_t>(VernauxBuf) % sizeof(uint32_t) != 0) + return createError("invalid " + describe(*this, Sec) + + ": found a misaligned auxiliary entry at offset 0x" + + Twine::utohexstr(VernauxBuf - Start)); + + if (VernauxBuf + sizeof(Elf_Vernaux) > End) + return createError( + "invalid " + describe(*this, Sec) + ": version dependency " + + Twine(I) + + " refers to an auxiliary entry that goes past the end " + "of the section"); + + const Elf_Vernaux *Vernaux = + reinterpret_cast<const Elf_Vernaux *>(VernauxBuf); + + VernAux &Aux = *VN.AuxV.emplace(VN.AuxV.end()); + Aux.Hash = Vernaux->vna_hash; + Aux.Flags = Vernaux->vna_flags; + Aux.Other = Vernaux->vna_other; + Aux.Offset = VernauxBuf - Start; + if (StrTab.size() <= Vernaux->vna_name) + Aux.Name = "<corrupt>"; + else + Aux.Name = std::string(StrTab.drop_front(Vernaux->vna_name)); + + VernauxBuf += Vernaux->vna_next; + } + VerneedBuf += Verneed->vn_next; + } + return Ret; } template <class ELFT> @@ -636,14 +1048,14 @@ ELFFile<ELFT>::getSection(uint32_t Index) const { template <class ELFT> Expected<StringRef> -ELFFile<ELFT>::getStringTable(const Elf_Shdr *Section, +ELFFile<ELFT>::getStringTable(const Elf_Shdr &Section, WarningHandler WarnHandler) const { - if (Section->sh_type != ELF::SHT_STRTAB) + if (Section.sh_type != ELF::SHT_STRTAB) if (Error E = WarnHandler("invalid sh_type for string table section " + - getSecIndexForError(this, Section) + + getSecIndexForError(*this, Section) + ": expected SHT_STRTAB, but got " + object::getELFSectionTypeName( - getHeader()->e_machine, Section->sh_type))) + getHeader().e_machine, Section.sh_type))) return std::move(E); auto V = getSectionContentsAsArray<char>(Section); @@ -652,10 +1064,10 @@ ELFFile<ELFT>::getStringTable(const Elf_Shdr *Section, ArrayRef<char> Data = *V; if (Data.empty()) return createError("SHT_STRTAB string table section " + - getSecIndexForError(this, Section) + " is empty"); + getSecIndexForError(*this, Section) + " is empty"); if (Data.back() != '\0') return createError("SHT_STRTAB string table section " + - getSecIndexForError(this, Section) + + getSecIndexForError(*this, Section) + " is non-null terminated"); return StringRef(Data.begin(), Data.size()); } @@ -674,7 +1086,7 @@ Expected<ArrayRef<typename ELFT::Word>> ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section, Elf_Shdr_Range Sections) const { assert(Section.sh_type == ELF::SHT_SYMTAB_SHNDX); - auto VOrErr = getSectionContentsAsArray<Elf_Word>(&Section); + auto VOrErr = getSectionContentsAsArray<Elf_Word>(Section); if (!VOrErr) return VOrErr.takeError(); ArrayRef<Elf_Word> V = *VOrErr; @@ -684,10 +1096,10 @@ ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section, const Elf_Shdr &SymTable = **SymTableOrErr; if (SymTable.sh_type != ELF::SHT_SYMTAB && SymTable.sh_type != ELF::SHT_DYNSYM) - return createError("SHT_SYMTAB_SHNDX section is linked with " + - object::getELFSectionTypeName(getHeader()->e_machine, - SymTable.sh_type) + - " section (expected SHT_SYMTAB/SHT_DYNSYM)"); + return createError( + "SHT_SYMTAB_SHNDX section is linked with " + + object::getELFSectionTypeName(getHeader().e_machine, SymTable.sh_type) + + " section (expected SHT_SYMTAB/SHT_DYNSYM)"); uint64_t Syms = SymTable.sh_size / sizeof(Elf_Sym); if (V.size() != Syms) @@ -715,15 +1127,33 @@ ELFFile<ELFT>::getStringTableForSymtab(const Elf_Shdr &Sec, if (Sec.sh_type != ELF::SHT_SYMTAB && Sec.sh_type != ELF::SHT_DYNSYM) return createError( "invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM"); - auto SectionOrErr = object::getSection<ELFT>(Sections, Sec.sh_link); + Expected<const Elf_Shdr *> SectionOrErr = + object::getSection<ELFT>(Sections, Sec.sh_link); if (!SectionOrErr) return SectionOrErr.takeError(); - return getStringTable(*SectionOrErr); + return getStringTable(**SectionOrErr); +} + +template <class ELFT> +Expected<StringRef> +ELFFile<ELFT>::getLinkAsStrtab(const typename ELFT::Shdr &Sec) const { + Expected<const typename ELFT::Shdr *> StrTabSecOrErr = + getSection(Sec.sh_link); + if (!StrTabSecOrErr) + return createError("invalid section linked to " + describe(*this, Sec) + + ": " + toString(StrTabSecOrErr.takeError())); + + Expected<StringRef> StrTabOrErr = getStringTable(**StrTabSecOrErr); + if (!StrTabOrErr) + return createError("invalid string table linked to " + + describe(*this, Sec) + ": " + + toString(StrTabOrErr.takeError())); + return *StrTabOrErr; } template <class ELFT> Expected<StringRef> -ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section, +ELFFile<ELFT>::getSectionName(const Elf_Shdr &Section, WarningHandler WarnHandler) const { auto SectionsOrErr = sections(); if (!SectionsOrErr) @@ -735,13 +1165,13 @@ ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section, } template <class ELFT> -Expected<StringRef> ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section, +Expected<StringRef> ELFFile<ELFT>::getSectionName(const Elf_Shdr &Section, StringRef DotShstrtab) const { - uint32_t Offset = Section->sh_name; + uint32_t Offset = Section.sh_name; if (Offset == 0) return StringRef(); if (Offset >= DotShstrtab.size()) - return createError("a section " + getSecIndexForError(this, Section) + + return createError("a section " + getSecIndexForError(*this, Section) + " has an invalid sh_name (0x" + Twine::utohexstr(Offset) + ") offset which goes past the end of the " diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 62ecd8b5a7e5..fed53eef68c3 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -51,6 +51,12 @@ class ELFObjectFileBase : public ObjectFile { friend class ELFSectionRef; friend class ELFSymbolRef; + SubtargetFeatures getMIPSFeatures() const; + SubtargetFeatures getARMFeatures() const; + SubtargetFeatures getRISCVFeatures() const; + + StringRef getAMDGPUCPUName() const; + protected: ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source); @@ -80,11 +86,7 @@ public: SubtargetFeatures getFeatures() const override; - SubtargetFeatures getMIPSFeatures() const; - - SubtargetFeatures getARMFeatures() const; - - SubtargetFeatures getRISCVFeatures() const; + Optional<StringRef> tryGetCPUName() const override; void setARMSubArch(Triple &TheTriple) const override; @@ -92,7 +94,8 @@ public: virtual uint16_t getEMachine() const = 0; - std::vector<std::pair<DataRefImpl, uint64_t>> getPltAddresses() const; + std::vector<std::pair<Optional<DataRefImpl>, uint64_t>> + getPltAddresses() const; }; class ELFSectionRef : public SectionRef { @@ -230,30 +233,31 @@ template <class ELFT> class ELFObjectFile : public ELFObjectFileBase { public: LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - using uintX_t = typename ELFT::uint; - - using Elf_Sym = typename ELFT::Sym; - using Elf_Shdr = typename ELFT::Shdr; - using Elf_Ehdr = typename ELFT::Ehdr; - using Elf_Rel = typename ELFT::Rel; - using Elf_Rela = typename ELFT::Rela; - using Elf_Dyn = typename ELFT::Dyn; - SectionRef toSectionRef(const Elf_Shdr *Sec) const { return SectionRef(toDRI(Sec), this); } + ELFSymbolRef toSymbolRef(const Elf_Shdr *SymTable, unsigned SymbolNum) const { + return ELFSymbolRef({toDRI(SymTable, SymbolNum), this}); + } + + bool IsContentValid() const { return ContentValid; } + private: ELFObjectFile(MemoryBufferRef Object, ELFFile<ELFT> EF, const Elf_Shdr *DotDynSymSec, const Elf_Shdr *DotSymtabSec, - ArrayRef<Elf_Word> ShndxTable); + const Elf_Shdr *DotSymtabShndxSec); + + bool ContentValid = false; protected: ELFFile<ELFT> EF; const Elf_Shdr *DotDynSymSec = nullptr; // Dynamic symbol table section. const Elf_Shdr *DotSymtabSec = nullptr; // Symbol table section. - ArrayRef<Elf_Word> ShndxTable; + const Elf_Shdr *DotSymtabShndxSec = nullptr; // SHT_SYMTAB_SHNDX section. + + Error initContent() override; void moveSymbolNext(DataRefImpl &Symb) const override; Expected<StringRef> getSymbolName(DataRefImpl Symb) const override; @@ -304,14 +308,6 @@ protected: uint64_t getSectionOffset(DataRefImpl Sec) const override; StringRef getRelocationTypeName(uint32_t Type) const; - /// Get the relocation section that contains \a Rel. - const Elf_Shdr *getRelSection(DataRefImpl Rel) const { - auto RelSecOrErr = EF.getSection(Rel.d.a); - if (!RelSecOrErr) - report_fatal_error(errorToErrorCode(RelSecOrErr.takeError()).message()); - return *RelSecOrErr; - } - DataRefImpl toDRI(const Elf_Shdr *SymTable, unsigned SymbolNum) const { DataRefImpl DRI; if (!SymTable) { @@ -374,7 +370,7 @@ protected: for (const Elf_Shdr &Sec : *SectionsOrErr) { if (Sec.sh_type == ELF::SHT_ARM_ATTRIBUTES || Sec.sh_type == ELF::SHT_RISCV_ATTRIBUTES) { - auto ErrorOrContents = EF.getSectionContents(&Sec); + auto ErrorOrContents = EF.getSectionContents(Sec); if (!ErrorOrContents) return ErrorOrContents.takeError(); @@ -397,16 +393,22 @@ protected: public: ELFObjectFile(ELFObjectFile<ELFT> &&Other); - static Expected<ELFObjectFile<ELFT>> create(MemoryBufferRef Object); + static Expected<ELFObjectFile<ELFT>> create(MemoryBufferRef Object, + bool InitContent = true); const Elf_Rel *getRel(DataRefImpl Rel) const; const Elf_Rela *getRela(DataRefImpl Rela) const; - const Elf_Sym *getSymbol(DataRefImpl Sym) const { - auto Ret = EF.template getEntry<Elf_Sym>(Sym.d.a, Sym.d.b); - if (!Ret) - report_fatal_error(errorToErrorCode(Ret.takeError()).message()); - return *Ret; + Expected<const Elf_Sym *> getSymbol(DataRefImpl Sym) const { + return EF.template getEntry<Elf_Sym>(Sym.d.a, Sym.d.b); + } + + /// Get the relocation section that contains \a Rel. + const Elf_Shdr *getRelSection(DataRefImpl Rel) const { + auto RelSecOrErr = EF.getSection(Rel.d.a); + if (!RelSecOrErr) + report_fatal_error(errorToErrorCode(RelSecOrErr.takeError()).message()); + return *RelSecOrErr; } const Elf_Shdr *getSection(DataRefImpl Sec) const { @@ -429,9 +431,9 @@ public: Triple::ArchType getArch() const override; Expected<uint64_t> getStartAddress() const override; - unsigned getPlatformFlags() const override { return EF.getHeader()->e_flags; } + unsigned getPlatformFlags() const override { return EF.getHeader().e_flags; } - const ELFFile<ELFT> *getELFFile() const { return &EF; } + const ELFFile<ELFT> &getELFFile() const { return EF; } bool isDyldType() const { return isDyldELFObject; } static bool classof(const Binary *v) { @@ -454,9 +456,40 @@ void ELFObjectFile<ELFT>::moveSymbolNext(DataRefImpl &Sym) const { ++Sym.d.b; } +template <class ELFT> Error ELFObjectFile<ELFT>::initContent() { + auto SectionsOrErr = EF.sections(); + if (!SectionsOrErr) + return SectionsOrErr.takeError(); + + for (const Elf_Shdr &Sec : *SectionsOrErr) { + switch (Sec.sh_type) { + case ELF::SHT_DYNSYM: { + if (!DotDynSymSec) + DotDynSymSec = &Sec; + break; + } + case ELF::SHT_SYMTAB: { + if (!DotSymtabSec) + DotSymtabSec = &Sec; + break; + } + case ELF::SHT_SYMTAB_SHNDX: { + if (!DotSymtabShndxSec) + DotSymtabShndxSec = &Sec; + break; + } + } + } + + ContentValid = true; + return Error::success(); +} + template <class ELFT> Expected<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const { - const Elf_Sym *ESym = getSymbol(Sym); + Expected<const Elf_Sym *> SymOrErr = getSymbol(Sym); + if (!SymOrErr) + return SymOrErr.takeError(); auto SymTabOrErr = EF.getSection(Sym.d.a); if (!SymTabOrErr) return SymTabOrErr.takeError(); @@ -465,15 +498,15 @@ Expected<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const { if (!StrTabOrErr) return StrTabOrErr.takeError(); const Elf_Shdr *StringTableSec = *StrTabOrErr; - auto SymStrTabOrErr = EF.getStringTable(StringTableSec); + auto SymStrTabOrErr = EF.getStringTable(*StringTableSec); if (!SymStrTabOrErr) return SymStrTabOrErr.takeError(); - Expected<StringRef> Name = ESym->getName(*SymStrTabOrErr); + Expected<StringRef> Name = (*SymOrErr)->getName(*SymStrTabOrErr); if (Name && !Name->empty()) return Name; // If the symbol name is empty use the section name. - if (ESym->getType() == ELF::STT_SECTION) { + if ((*SymOrErr)->getType() == ELF::STT_SECTION) { if (Expected<section_iterator> SecOrErr = getSymbolSection(Sym)) { consumeError(Name.takeError()); return (*SecOrErr)->getName(); @@ -499,15 +532,18 @@ uint64_t ELFObjectFile<ELFT>::getSectionOffset(DataRefImpl Sec) const { template <class ELFT> uint64_t ELFObjectFile<ELFT>::getSymbolValueImpl(DataRefImpl Symb) const { - const Elf_Sym *ESym = getSymbol(Symb); - uint64_t Ret = ESym->st_value; - if (ESym->st_shndx == ELF::SHN_ABS) + Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb); + if (!SymOrErr) + report_fatal_error(SymOrErr.takeError()); + + uint64_t Ret = (*SymOrErr)->st_value; + if ((*SymOrErr)->st_shndx == ELF::SHN_ABS) return Ret; - const Elf_Ehdr *Header = EF.getHeader(); + const Elf_Ehdr &Header = EF.getHeader(); // Clear the ARM/Thumb or microMIPS indicator flag. - if ((Header->e_machine == ELF::EM_ARM || Header->e_machine == ELF::EM_MIPS) && - ESym->getType() == ELF::STT_FUNC) + if ((Header.e_machine == ELF::EM_ARM || Header.e_machine == ELF::EM_MIPS) && + (*SymOrErr)->getType() == ELF::STT_FUNC) Ret &= ~1; return Ret; @@ -522,22 +558,34 @@ ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb) const { return SymbolValueOrErr.takeError(); uint64_t Result = *SymbolValueOrErr; - const Elf_Sym *ESym = getSymbol(Symb); - switch (ESym->st_shndx) { + Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb); + if (!SymOrErr) + return SymOrErr.takeError(); + + switch ((*SymOrErr)->st_shndx) { case ELF::SHN_COMMON: case ELF::SHN_UNDEF: case ELF::SHN_ABS: return Result; } - const Elf_Ehdr *Header = EF.getHeader(); auto SymTabOrErr = EF.getSection(Symb.d.a); if (!SymTabOrErr) return SymTabOrErr.takeError(); - const Elf_Shdr *SymTab = *SymTabOrErr; - if (Header->e_type == ELF::ET_REL) { - auto SectionOrErr = EF.getSection(ESym, SymTab, ShndxTable); + if (EF.getHeader().e_type == ELF::ET_REL) { + ArrayRef<Elf_Word> ShndxTable; + if (DotSymtabShndxSec) { + // TODO: Test this error. + if (Expected<ArrayRef<Elf_Word>> ShndxTableOrErr = + EF.getSHNDXTable(*DotSymtabShndxSec)) + ShndxTable = *ShndxTableOrErr; + else + return ShndxTableOrErr.takeError(); + } + + Expected<const Elf_Shdr *> SectionOrErr = + EF.getSection(**SymOrErr, *SymTabOrErr, ShndxTable); if (!SectionOrErr) return SectionOrErr.takeError(); const Elf_Shdr *Section = *SectionOrErr; @@ -550,52 +598,68 @@ ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb) const { template <class ELFT> uint32_t ELFObjectFile<ELFT>::getSymbolAlignment(DataRefImpl Symb) const { - const Elf_Sym *Sym = getSymbol(Symb); - if (Sym->st_shndx == ELF::SHN_COMMON) - return Sym->st_value; + Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb); + if (!SymOrErr) + report_fatal_error(SymOrErr.takeError()); + if ((*SymOrErr)->st_shndx == ELF::SHN_COMMON) + return (*SymOrErr)->st_value; return 0; } template <class ELFT> uint16_t ELFObjectFile<ELFT>::getEMachine() const { - return EF.getHeader()->e_machine; + return EF.getHeader().e_machine; } template <class ELFT> uint16_t ELFObjectFile<ELFT>::getEType() const { - return EF.getHeader()->e_type; + return EF.getHeader().e_type; } template <class ELFT> uint64_t ELFObjectFile<ELFT>::getSymbolSize(DataRefImpl Sym) const { - return getSymbol(Sym)->st_size; + Expected<const Elf_Sym *> SymOrErr = getSymbol(Sym); + if (!SymOrErr) + report_fatal_error(SymOrErr.takeError()); + return (*SymOrErr)->st_size; } template <class ELFT> uint64_t ELFObjectFile<ELFT>::getCommonSymbolSizeImpl(DataRefImpl Symb) const { - return getSymbol(Symb)->st_size; + return getSymbolSize(Symb); } template <class ELFT> uint8_t ELFObjectFile<ELFT>::getSymbolBinding(DataRefImpl Symb) const { - return getSymbol(Symb)->getBinding(); + Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb); + if (!SymOrErr) + report_fatal_error(SymOrErr.takeError()); + return (*SymOrErr)->getBinding(); } template <class ELFT> uint8_t ELFObjectFile<ELFT>::getSymbolOther(DataRefImpl Symb) const { - return getSymbol(Symb)->st_other; + Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb); + if (!SymOrErr) + report_fatal_error(SymOrErr.takeError()); + return (*SymOrErr)->st_other; } template <class ELFT> uint8_t ELFObjectFile<ELFT>::getSymbolELFType(DataRefImpl Symb) const { - return getSymbol(Symb)->getType(); + Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb); + if (!SymOrErr) + report_fatal_error(SymOrErr.takeError()); + return (*SymOrErr)->getType(); } template <class ELFT> Expected<SymbolRef::Type> ELFObjectFile<ELFT>::getSymbolType(DataRefImpl Symb) const { - const Elf_Sym *ESym = getSymbol(Symb); + Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb); + if (!SymOrErr) + return SymOrErr.takeError(); - switch (ESym->getType()) { + switch ((*SymOrErr)->getType()) { case ELF::STT_NOTYPE: return SymbolRef::ST_Unknown; case ELF::STT_SECTION: @@ -615,8 +679,11 @@ ELFObjectFile<ELFT>::getSymbolType(DataRefImpl Symb) const { template <class ELFT> Expected<uint32_t> ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const { - const Elf_Sym *ESym = getSymbol(Sym); + Expected<const Elf_Sym *> SymOrErr = getSymbol(Sym); + if (!SymOrErr) + return SymOrErr.takeError(); + const Elf_Sym *ESym = *SymOrErr; uint32_t Result = SymbolRef::SF_None; if (ESym->getBinding() != ELF::STB_LOCAL) @@ -649,7 +716,7 @@ Expected<uint32_t> ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const { // TODO: Test this error. return SymbolsOrErr.takeError(); - if (EF.getHeader()->e_machine == ELF::EM_ARM) { + if (EF.getHeader().e_machine == ELF::EM_ARM) { if (Expected<StringRef> NameOrErr = getSymbolName(Sym)) { StringRef Name = *NameOrErr; if (Name.startswith("$d") || Name.startswith("$t") || @@ -682,7 +749,17 @@ template <class ELFT> Expected<section_iterator> ELFObjectFile<ELFT>::getSymbolSection(const Elf_Sym *ESym, const Elf_Shdr *SymTab) const { - auto ESecOrErr = EF.getSection(ESym, SymTab, ShndxTable); + ArrayRef<Elf_Word> ShndxTable; + if (DotSymtabShndxSec) { + // TODO: Test this error. + Expected<ArrayRef<Elf_Word>> ShndxTableOrErr = + EF.getSHNDXTable(*DotSymtabShndxSec); + if (!ShndxTableOrErr) + return ShndxTableOrErr.takeError(); + ShndxTable = *ShndxTableOrErr; + } + + auto ESecOrErr = EF.getSection(*ESym, SymTab, ShndxTable); if (!ESecOrErr) return ESecOrErr.takeError(); @@ -698,12 +775,14 @@ ELFObjectFile<ELFT>::getSymbolSection(const Elf_Sym *ESym, template <class ELFT> Expected<section_iterator> ELFObjectFile<ELFT>::getSymbolSection(DataRefImpl Symb) const { - const Elf_Sym *Sym = getSymbol(Symb); + Expected<const Elf_Sym *> SymOrErr = getSymbol(Symb); + if (!SymOrErr) + return SymOrErr.takeError(); + auto SymTabOrErr = EF.getSection(Symb.d.a); if (!SymTabOrErr) return SymTabOrErr.takeError(); - const Elf_Shdr *SymTab = *SymTabOrErr; - return getSymbolSection(Sym, SymTab); + return getSymbolSection(*SymOrErr, *SymTabOrErr); } template <class ELFT> @@ -714,7 +793,7 @@ void ELFObjectFile<ELFT>::moveSectionNext(DataRefImpl &Sec) const { template <class ELFT> Expected<StringRef> ELFObjectFile<ELFT>::getSectionName(DataRefImpl Sec) const { - return EF.getSectionName(&*getSection(Sec)); + return EF.getSectionName(*getSection(Sec)); } template <class ELFT> @@ -844,7 +923,7 @@ ELFObjectFile<ELFT>::section_rel_begin(DataRefImpl Sec) const { if (!SectionsOrErr) return relocation_iterator(RelocationRef()); uintptr_t SHT = reinterpret_cast<uintptr_t>((*SectionsOrErr).begin()); - RelData.d.a = (Sec.p - SHT) / EF.getHeader()->e_shentsize; + RelData.d.a = (Sec.p - SHT) / EF.getHeader().e_shentsize; RelData.d.b = 0; return relocation_iterator(RelocationRef(RelData, this)); } @@ -871,7 +950,7 @@ ELFObjectFile<ELFT>::section_rel_end(DataRefImpl Sec) const { template <class ELFT> Expected<section_iterator> ELFObjectFile<ELFT>::getRelocatedSection(DataRefImpl Sec) const { - if (EF.getHeader()->e_type != ELF::ET_REL) + if (EF.getHeader().e_type != ELF::ET_REL) return section_end(); const Elf_Shdr *EShdr = getSection(Sec); @@ -930,7 +1009,7 @@ uint64_t ELFObjectFile<ELFT>::getRelocationType(DataRefImpl Rel) const { template <class ELFT> StringRef ELFObjectFile<ELFT>::getRelocationTypeName(uint32_t Type) const { - return getELFRelocationTypeName(EF.getHeader()->e_machine, Type); + return getELFRelocationTypeName(EF.getHeader().e_machine, Type); } template <class ELFT> @@ -970,59 +1049,34 @@ ELFObjectFile<ELFT>::getRela(DataRefImpl Rela) const { template <class ELFT> Expected<ELFObjectFile<ELFT>> -ELFObjectFile<ELFT>::create(MemoryBufferRef Object) { +ELFObjectFile<ELFT>::create(MemoryBufferRef Object, bool InitContent) { auto EFOrErr = ELFFile<ELFT>::create(Object.getBuffer()); if (Error E = EFOrErr.takeError()) return std::move(E); - auto EF = std::move(*EFOrErr); - auto SectionsOrErr = EF.sections(); - if (!SectionsOrErr) - return SectionsOrErr.takeError(); - - const Elf_Shdr *DotDynSymSec = nullptr; - const Elf_Shdr *DotSymtabSec = nullptr; - ArrayRef<Elf_Word> ShndxTable; - for (const Elf_Shdr &Sec : *SectionsOrErr) { - switch (Sec.sh_type) { - case ELF::SHT_DYNSYM: { - if (!DotDynSymSec) - DotDynSymSec = &Sec; - break; - } - case ELF::SHT_SYMTAB: { - if (!DotSymtabSec) - DotSymtabSec = &Sec; - break; - } - case ELF::SHT_SYMTAB_SHNDX: { - auto TableOrErr = EF.getSHNDXTable(Sec); - if (!TableOrErr) - return TableOrErr.takeError(); - ShndxTable = *TableOrErr; - break; - } - } - } - return ELFObjectFile<ELFT>(Object, EF, DotDynSymSec, DotSymtabSec, - ShndxTable); + ELFObjectFile<ELFT> Obj = {Object, std::move(*EFOrErr), nullptr, nullptr, + nullptr}; + if (InitContent) + if (Error E = Obj.initContent()) + return std::move(E); + return std::move(Obj); } template <class ELFT> ELFObjectFile<ELFT>::ELFObjectFile(MemoryBufferRef Object, ELFFile<ELFT> EF, const Elf_Shdr *DotDynSymSec, const Elf_Shdr *DotSymtabSec, - ArrayRef<Elf_Word> ShndxTable) + const Elf_Shdr *DotSymtabShndx) : ELFObjectFileBase( getELFType(ELFT::TargetEndianness == support::little, ELFT::Is64Bits), Object), EF(EF), DotDynSymSec(DotDynSymSec), DotSymtabSec(DotSymtabSec), - ShndxTable(ShndxTable) {} + DotSymtabShndxSec(DotSymtabShndx) {} template <class ELFT> ELFObjectFile<ELFT>::ELFObjectFile(ELFObjectFile<ELFT> &&Other) : ELFObjectFile(Other.Data, Other.EF, Other.DotDynSymSec, - Other.DotSymtabSec, Other.ShndxTable) {} + Other.DotSymtabSec, Other.DotSymtabShndxSec) {} template <class ELFT> basic_symbol_iterator ELFObjectFile<ELFT>::symbol_begin() const { @@ -1084,9 +1138,9 @@ uint8_t ELFObjectFile<ELFT>::getBytesInAddress() const { template <class ELFT> StringRef ELFObjectFile<ELFT>::getFileFormatName() const { bool IsLittleEndian = ELFT::TargetEndianness == support::little; - switch (EF.getHeader()->e_ident[ELF::EI_CLASS]) { + switch (EF.getHeader().e_ident[ELF::EI_CLASS]) { case ELF::ELFCLASS32: - switch (EF.getHeader()->e_machine) { + switch (EF.getHeader().e_machine) { case ELF::EM_386: return "elf32-i386"; case ELF::EM_IAMCU: @@ -1106,9 +1160,11 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const { case ELF::EM_MSP430: return "elf32-msp430"; case ELF::EM_PPC: - return "elf32-powerpc"; + return (IsLittleEndian ? "elf32-powerpcle" : "elf32-powerpc"); case ELF::EM_RISCV: return "elf32-littleriscv"; + case ELF::EM_CSKY: + return "elf32-csky"; case ELF::EM_SPARC: case ELF::EM_SPARC32PLUS: return "elf32-sparc"; @@ -1118,7 +1174,7 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const { return "elf32-unknown"; } case ELF::ELFCLASS64: - switch (EF.getHeader()->e_machine) { + switch (EF.getHeader().e_machine) { case ELF::EM_386: return "elf64-i386"; case ELF::EM_X86_64: @@ -1152,7 +1208,7 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const { template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const { bool IsLittleEndian = ELFT::TargetEndianness == support::little; - switch (EF.getHeader()->e_machine) { + switch (EF.getHeader().e_machine) { case ELF::EM_386: case ELF::EM_IAMCU: return Triple::x86; @@ -1169,7 +1225,7 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const { case ELF::EM_LANAI: return Triple::lanai; case ELF::EM_MIPS: - switch (EF.getHeader()->e_ident[ELF::EI_CLASS]) { + switch (EF.getHeader().e_ident[ELF::EI_CLASS]) { case ELF::ELFCLASS32: return IsLittleEndian ? Triple::mipsel : Triple::mips; case ELF::ELFCLASS64: @@ -1180,11 +1236,11 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const { case ELF::EM_MSP430: return Triple::msp430; case ELF::EM_PPC: - return Triple::ppc; + return IsLittleEndian ? Triple::ppcle : Triple::ppc; case ELF::EM_PPC64: return IsLittleEndian ? Triple::ppc64le : Triple::ppc64; case ELF::EM_RISCV: - switch (EF.getHeader()->e_ident[ELF::EI_CLASS]) { + switch (EF.getHeader().e_ident[ELF::EI_CLASS]) { case ELF::ELFCLASS32: return Triple::riscv32; case ELF::ELFCLASS64: @@ -1205,7 +1261,7 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const { if (!IsLittleEndian) return Triple::UnknownArch; - unsigned MACH = EF.getHeader()->e_flags & ELF::EF_AMDGPU_MACH; + unsigned MACH = EF.getHeader().e_flags & ELF::EF_AMDGPU_MACH; if (MACH >= ELF::EF_AMDGPU_MACH_R600_FIRST && MACH <= ELF::EF_AMDGPU_MACH_R600_LAST) return Triple::r600; @@ -1221,6 +1277,8 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const { case ELF::EM_VE: return Triple::ve; + case ELF::EM_CSKY: + return Triple::csky; default: return Triple::UnknownArch; } @@ -1228,7 +1286,7 @@ template <class ELFT> Triple::ArchType ELFObjectFile<ELFT>::getArch() const { template <class ELFT> Expected<uint64_t> ELFObjectFile<ELFT>::getStartAddress() const { - return EF.getHeader()->e_entry; + return EF.getHeader().e_entry; } template <class ELFT> @@ -1238,7 +1296,7 @@ ELFObjectFile<ELFT>::getDynamicSymbolIterators() const { } template <class ELFT> bool ELFObjectFile<ELFT>::isRelocatableObject() const { - return EF.getHeader()->e_type == ELF::ET_REL; + return EF.getHeader().e_type == ELF::ET_REL; } } // end namespace object diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index 5e85e6cc4653..f64e7c06e03b 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -107,7 +107,34 @@ using ELF64BE = ELFType<support::big, true>; using Elf_Word = typename ELFT::Word; \ using Elf_Sword = typename ELFT::Sword; \ using Elf_Xword = typename ELFT::Xword; \ - using Elf_Sxword = typename ELFT::Sxword; + using Elf_Sxword = typename ELFT::Sxword; \ + using uintX_t = typename ELFT::uint; \ + using Elf_Ehdr = typename ELFT::Ehdr; \ + using Elf_Shdr = typename ELFT::Shdr; \ + using Elf_Sym = typename ELFT::Sym; \ + using Elf_Dyn = typename ELFT::Dyn; \ + using Elf_Phdr = typename ELFT::Phdr; \ + using Elf_Rel = typename ELFT::Rel; \ + using Elf_Rela = typename ELFT::Rela; \ + using Elf_Relr = typename ELFT::Relr; \ + using Elf_Verdef = typename ELFT::Verdef; \ + using Elf_Verdaux = typename ELFT::Verdaux; \ + using Elf_Verneed = typename ELFT::Verneed; \ + using Elf_Vernaux = typename ELFT::Vernaux; \ + using Elf_Versym = typename ELFT::Versym; \ + using Elf_Hash = typename ELFT::Hash; \ + using Elf_GnuHash = typename ELFT::GnuHash; \ + using Elf_Nhdr = typename ELFT::Nhdr; \ + using Elf_Note = typename ELFT::Note; \ + using Elf_Note_Iterator = typename ELFT::NoteIterator; \ + using Elf_CGProfile = typename ELFT::CGProfile; \ + using Elf_Dyn_Range = typename ELFT::DynRange; \ + using Elf_Shdr_Range = typename ELFT::ShdrRange; \ + using Elf_Sym_Range = typename ELFT::SymRange; \ + using Elf_Rel_Range = typename ELFT::RelRange; \ + using Elf_Rela_Range = typename ELFT::RelaRange; \ + using Elf_Relr_Range = typename ELFT::RelrRange; \ + using Elf_Phdr_Range = typename ELFT::PhdrRange; \ #define LLVM_ELF_COMMA , #define LLVM_ELF_IMPORT_TYPES(E, W) \ @@ -269,7 +296,6 @@ struct Elf_Versym_Impl { template <class ELFT> struct Elf_Verdef_Impl { LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - using Elf_Verdaux = Elf_Verdaux_Impl<ELFT>; Elf_Half vd_version; // Version of this structure (e.g. VER_DEF_CURRENT) Elf_Half vd_flags; // Bitwise flags (VER_DEF_*) Elf_Half vd_ndx; // Version index, used in .gnu.version entries diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h index f48e0f1dcd58..7eb017397846 100644 --- a/llvm/include/llvm/Object/MachO.h +++ b/llvm/include/llvm/Object/MachO.h @@ -615,6 +615,7 @@ public: case MachO::PLATFORM_IOSSIMULATOR: return "iossimulator"; case MachO::PLATFORM_TVOSSIMULATOR: return "tvossimulator"; case MachO::PLATFORM_WATCHOSSIMULATOR: return "watchossimulator"; + case MachO::PLATFORM_DRIVERKIT: return "driverkit"; default: std::string ret; raw_string_ostream ss(ret); diff --git a/llvm/include/llvm/Object/MachOUniversal.h b/llvm/include/llvm/Object/MachOUniversal.h index 5e006fd87318..9bcacb510108 100644 --- a/llvm/include/llvm/Object/MachOUniversal.h +++ b/llvm/include/llvm/Object/MachOUniversal.h @@ -22,8 +22,11 @@ namespace llvm { class StringRef; +class Module; +class LLVMContext; namespace object { +class IRObjectFile; class MachOUniversalBinary : public Binary { virtual void anchor(); @@ -101,6 +104,8 @@ public: } Expected<std::unique_ptr<MachOObjectFile>> getAsObjectFile() const; + Expected<std::unique_ptr<IRObjectFile>> + getAsIRObject(LLVMContext &Ctx) const; Expected<std::unique_ptr<Archive>> getAsArchive() const; }; @@ -154,6 +159,9 @@ public: Expected<std::unique_ptr<MachOObjectFile>> getMachOObjectForArch(StringRef ArchName) const; + Expected<std::unique_ptr<IRObjectFile>> + getIRObjectForArch(StringRef ArchName, LLVMContext &Ctx) const; + Expected<std::unique_ptr<Archive>> getArchiveForArch(StringRef ArchName) const; }; diff --git a/llvm/include/llvm/Object/MachOUniversalWriter.h b/llvm/include/llvm/Object/MachOUniversalWriter.h new file mode 100644 index 000000000000..cdfedcf0379e --- /dev/null +++ b/llvm/include/llvm/Object/MachOUniversalWriter.h @@ -0,0 +1,102 @@ +//===- MachOUniversalWriter.h - MachO universal binary writer----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declares the Slice class and writeUniversalBinary function for writing a +// MachO universal binary file. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_MACHOUNIVERSALWRITER_H +#define LLVM_OBJECT_MACHOUNIVERSALWRITER_H + +#include "llvm/Object/Archive.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/MachO.h" + +namespace llvm { +class LLVMContext; + +namespace object { +class IRObjectFile; + +class Slice { + const Binary *B; + uint32_t CPUType; + uint32_t CPUSubType; + std::string ArchName; + + // P2Alignment field stores slice alignment values from universal + // binaries. This is also needed to order the slices so the total + // file size can be calculated before creating the output buffer. + uint32_t P2Alignment; + + Slice(const IRObjectFile &IRO, uint32_t CPUType, uint32_t CPUSubType, + std::string ArchName, uint32_t Align); + +public: + explicit Slice(const MachOObjectFile &O); + + Slice(const MachOObjectFile &O, uint32_t Align); + + /// This constructor takes pre-specified \param CPUType , \param CPUSubType , + /// \param ArchName , \param Align instead of inferring them from the archive + /// members. + Slice(const Archive &A, uint32_t CPUType, uint32_t CPUSubType, + std::string ArchName, uint32_t Align); + + static Expected<Slice> create(const Archive &A, + LLVMContext *LLVMCtx = nullptr); + + static Expected<Slice> create(const IRObjectFile &IRO, uint32_t Align); + + void setP2Alignment(uint32_t Align) { P2Alignment = Align; } + + const Binary *getBinary() const { return B; } + + uint32_t getCPUType() const { return CPUType; } + + uint32_t getCPUSubType() const { return CPUSubType; } + + uint32_t getP2Alignment() const { return P2Alignment; } + + uint64_t getCPUID() const { + return static_cast<uint64_t>(CPUType) << 32 | CPUSubType; + } + + std::string getArchString() const { + if (!ArchName.empty()) + return ArchName; + return ("unknown(" + Twine(CPUType) + "," + + Twine(CPUSubType & ~MachO::CPU_SUBTYPE_MASK) + ")") + .str(); + } + + friend bool operator<(const Slice &Lhs, const Slice &Rhs) { + if (Lhs.CPUType == Rhs.CPUType) + return Lhs.CPUSubType < Rhs.CPUSubType; + // force arm64-family to follow after all other slices for + // compatibility with cctools lipo + if (Lhs.CPUType == MachO::CPU_TYPE_ARM64) + return false; + if (Rhs.CPUType == MachO::CPU_TYPE_ARM64) + return true; + // Sort by alignment to minimize file size + return Lhs.P2Alignment < Rhs.P2Alignment; + } +}; + +Error writeUniversalBinary(ArrayRef<Slice> Slices, StringRef OutputFileName); + +Expected<std::unique_ptr<MemoryBuffer>> +writeUniversalBinaryToBuffer(ArrayRef<Slice> Slices); + +} // end namespace object + +} // end namespace llvm + +#endif // LLVM_OBJECT_MACHOUNIVERSALWRITER_H diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h index 8e8937201716..27e40cbdbece 100644 --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -327,6 +327,7 @@ public: virtual StringRef getFileFormatName() const = 0; virtual Triple::ArchType getArch() const = 0; virtual SubtargetFeatures getFeatures() const = 0; + virtual Optional<StringRef> tryGetCPUName() const { return None; }; virtual void setARMSubArch(Triple &TheTriple) const { } virtual Expected<uint64_t> getStartAddress() const { return errorCodeToError(object_error::parse_failed); @@ -349,7 +350,8 @@ public: createObjectFile(StringRef ObjectPath); static Expected<std::unique_ptr<ObjectFile>> - createObjectFile(MemoryBufferRef Object, llvm::file_magic Type); + createObjectFile(MemoryBufferRef Object, llvm::file_magic Type, + bool InitContent = true); static Expected<std::unique_ptr<ObjectFile>> createObjectFile(MemoryBufferRef Object) { return createObjectFile(Object, llvm::file_magic::unknown); @@ -366,7 +368,7 @@ public: createXCOFFObjectFile(MemoryBufferRef Object, unsigned FileType); static Expected<std::unique_ptr<ObjectFile>> - createELFObjectFile(MemoryBufferRef Object); + createELFObjectFile(MemoryBufferRef Object, bool InitContent = true); static Expected<std::unique_ptr<MachOObjectFile>> createMachOObjectFile(MemoryBufferRef Object, diff --git a/llvm/include/llvm/Object/RelocationResolver.h b/llvm/include/llvm/Object/RelocationResolver.h index 1246dcc5ec73..46f74e90a91b 100644 --- a/llvm/include/llvm/Object/RelocationResolver.h +++ b/llvm/include/llvm/Object/RelocationResolver.h @@ -31,11 +31,17 @@ namespace llvm { namespace object { -using RelocationResolver = uint64_t (*)(RelocationRef R, uint64_t S, uint64_t A); +using SupportsRelocation = bool (*)(uint64_t); +using RelocationResolver = uint64_t (*)(uint64_t Type, uint64_t Offset, + uint64_t S, uint64_t LocData, + int64_t Addend); -std::pair<bool (*)(uint64_t), RelocationResolver> +std::pair<SupportsRelocation, RelocationResolver> getRelocationResolver(const ObjectFile &Obj); +uint64_t resolveRelocation(RelocationResolver Resolver, const RelocationRef &R, + uint64_t S, uint64_t LocData); + } // end namespace object } // end namespace llvm diff --git a/llvm/include/llvm/Object/StackMapParser.h b/llvm/include/llvm/Object/StackMapParser.h index b408f4041034..4ee67112ea5e 100644 --- a/llvm/include/llvm/Object/StackMapParser.h +++ b/llvm/include/llvm/Object/StackMapParser.h @@ -11,6 +11,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Object/ELF.h" #include "llvm/Support/Endian.h" #include <cassert> #include <cstddef> @@ -35,11 +36,13 @@ public: return tmp; } - bool operator==(const AccessorIterator &Other) { + bool operator==(const AccessorIterator &Other) const { return A.P == Other.A.P; } - bool operator!=(const AccessorIterator &Other) { return !(*this == Other); } + bool operator!=(const AccessorIterator &Other) const { + return !(*this == Other); + } AccessorT& operator*() { return A; } AccessorT* operator->() { return &A; } @@ -318,6 +321,23 @@ public: } } + /// Validates the header of the specified stack map section. + static Error validateHeader(ArrayRef<uint8_t> StackMapSection) { + // See the comment for StackMaps::emitStackmapHeader(). + if (StackMapSection.size() < 16) + return object::createError( + "the stack map section size (" + Twine(StackMapSection.size()) + + ") is less than the minimum possible size of its header (16)"); + + unsigned Version = StackMapSection[0]; + if (Version != 3) + return object::createError( + "the version (" + Twine(Version) + + ") of the stack map section is unsupported, the " + "supported version is 3"); + return Error::success(); + } + using function_iterator = AccessorIterator<FunctionAccessor>; using constant_iterator = AccessorIterator<ConstantAccessor>; using record_iterator = AccessorIterator<RecordAccessor>; diff --git a/llvm/include/llvm/Object/SymbolicFile.h b/llvm/include/llvm/Object/SymbolicFile.h index a0d8b7225598..012f9f7fad07 100644 --- a/llvm/include/llvm/Object/SymbolicFile.h +++ b/llvm/include/llvm/Object/SymbolicFile.h @@ -161,18 +161,18 @@ public: // construction aux. static Expected<std::unique_ptr<SymbolicFile>> createSymbolicFile(MemoryBufferRef Object, llvm::file_magic Type, - LLVMContext *Context); + LLVMContext *Context, bool InitContent = true); static Expected<std::unique_ptr<SymbolicFile>> createSymbolicFile(MemoryBufferRef Object) { return createSymbolicFile(Object, llvm::file_magic::unknown, nullptr); } - static Expected<OwningBinary<SymbolicFile>> - createSymbolicFile(StringRef ObjectPath); static bool classof(const Binary *v) { return v->isSymbolic(); } + + static bool isSymbolicFile(file_magic Type, const LLVMContext *Context); }; inline BasicSymbolRef::BasicSymbolRef(DataRefImpl SymbolP, diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h index dc90c891ab95..f7cd2e622ae3 100644 --- a/llvm/include/llvm/Object/Wasm.h +++ b/llvm/include/llvm/Object/Wasm.h @@ -36,13 +36,15 @@ class WasmSymbol { public: WasmSymbol(const wasm::WasmSymbolInfo &Info, const wasm::WasmGlobalType *GlobalType, + const wasm::WasmTableType *TableType, const wasm::WasmEventType *EventType, const wasm::WasmSignature *Signature) - : Info(Info), GlobalType(GlobalType), EventType(EventType), - Signature(Signature) {} + : Info(Info), GlobalType(GlobalType), TableType(TableType), + EventType(EventType), Signature(Signature) {} const wasm::WasmSymbolInfo &Info; const wasm::WasmGlobalType *GlobalType; + const wasm::WasmTableType *TableType; const wasm::WasmEventType *EventType; const wasm::WasmSignature *Signature; @@ -50,6 +52,8 @@ public: return Info.Kind == wasm::WASM_SYMBOL_TYPE_FUNCTION; } + bool isTypeTable() const { return Info.Kind == wasm::WASM_SYMBOL_TYPE_TABLE; } + bool isTypeData() const { return Info.Kind == wasm::WASM_SYMBOL_TYPE_DATA; } bool isTypeGlobal() const { @@ -105,6 +109,7 @@ struct WasmSection { uint32_t Type = 0; // Section type (See below) uint32_t Offset = 0; // Offset with in the file StringRef Name; // Section name (User-defined sections only) + uint32_t Comdat = UINT32_MAX; // From the "comdat info" section ArrayRef<uint8_t> Content; // Section content std::vector<wasm::WasmRelocation> Relocations; // Relocations for this section }; @@ -146,9 +151,10 @@ public: ArrayRef<wasm::WasmElemSegment> elements() const { return ElemSegments; } ArrayRef<WasmSegment> dataSegments() const { return DataSegments; } ArrayRef<wasm::WasmFunction> functions() const { return Functions; } - ArrayRef<wasm::WasmFunctionName> debugNames() const { return DebugNames; } + ArrayRef<wasm::WasmDebugName> debugNames() const { return DebugNames; } uint32_t startFunction() const { return StartFunction; } uint32_t getNumImportedGlobals() const { return NumImportedGlobals; } + uint32_t getNumImportedTables() const { return NumImportedTables; } uint32_t getNumImportedFunctions() const { return NumImportedFunctions; } uint32_t getNumImportedEvents() const { return NumImportedEvents; } uint32_t getNumSections() const { return Sections.size(); } @@ -214,10 +220,13 @@ private: bool isValidFunctionIndex(uint32_t Index) const; bool isDefinedFunctionIndex(uint32_t Index) const; bool isValidGlobalIndex(uint32_t Index) const; + bool isValidTableIndex(uint32_t Index) const; bool isDefinedGlobalIndex(uint32_t Index) const; + bool isDefinedTableIndex(uint32_t Index) const; bool isValidEventIndex(uint32_t Index) const; bool isDefinedEventIndex(uint32_t Index) const; bool isValidFunctionSymbol(uint32_t Index) const; + bool isValidTableSymbol(uint32_t Index) const; bool isValidGlobalSymbol(uint32_t Index) const; bool isValidEventSymbol(uint32_t Index) const; bool isValidDataSymbol(uint32_t Index) const; @@ -277,19 +286,22 @@ private: llvm::Optional<size_t> DataCount; std::vector<wasm::WasmFunction> Functions; std::vector<WasmSymbol> Symbols; - std::vector<wasm::WasmFunctionName> DebugNames; + std::vector<wasm::WasmDebugName> DebugNames; uint32_t StartFunction = -1; bool HasLinkingSection = false; bool HasDylinkSection = false; bool SeenCodeSection = false; + bool HasMemory64 = false; wasm::WasmLinkingData LinkingData; uint32_t NumImportedGlobals = 0; + uint32_t NumImportedTables = 0; uint32_t NumImportedFunctions = 0; uint32_t NumImportedEvents = 0; uint32_t CodeSection = 0; uint32_t DataSection = 0; uint32_t EventSection = 0; uint32_t GlobalSection = 0; + uint32_t TableSection = 0; }; class WasmSectionOrderChecker { diff --git a/llvm/include/llvm/Object/XCOFFObjectFile.h b/llvm/include/llvm/Object/XCOFFObjectFile.h index 9c2470736023..1ac00ed5e2c7 100644 --- a/llvm/include/llvm/Object/XCOFFObjectFile.h +++ b/llvm/include/llvm/Object/XCOFFObjectFile.h @@ -13,6 +13,8 @@ #ifndef LLVM_OBJECT_XCOFFOBJECTFILE_H #define LLVM_OBJECT_XCOFFOBJECTFILE_H +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/BinaryFormat/XCOFF.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Endian.h" @@ -370,6 +372,8 @@ public: Expected<ArrayRef<XCOFFRelocation32>> relocations(const XCOFFSectionHeader32 &) const; + + static bool classof(const Binary *B) { return B->isXCOFF(); } }; // XCOFFObjectFile class XCOFFSymbolRef { @@ -391,6 +395,103 @@ public: bool isFunction() const; }; +class TBVectorExt { + friend class XCOFFTracebackTable; + + uint16_t Data; + uint32_t VecParmsInfo; + + TBVectorExt(StringRef TBvectorStrRef); + +public: + uint8_t getNumberOfVRSaved() const; + bool isVRSavedOnStack() const; + bool hasVarArgs() const; + uint8_t getNumberOfVectorParms() const; + bool hasVMXInstruction() const; + SmallString<32> getVectorParmsInfoString() const; +}; + +/// This class provides methods to extract traceback table data from a buffer. +/// The various accessors may reference the buffer provided via the constructor. + +class XCOFFTracebackTable { + const uint8_t *const TBPtr; + Optional<SmallString<32>> ParmsType; + Optional<uint32_t> TraceBackTableOffset; + Optional<uint32_t> HandlerMask; + Optional<uint32_t> NumOfCtlAnchors; + Optional<SmallVector<uint32_t, 8>> ControlledStorageInfoDisp; + Optional<StringRef> FunctionName; + Optional<uint8_t> AllocaRegister; + Optional<TBVectorExt> VecExt; + Optional<uint8_t> ExtensionTable; + + XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size, Error &Err); +public: + /// Parse an XCOFF Traceback Table from \a Ptr with \a Size bytes. + /// Returns an XCOFFTracebackTable upon successful parsing, otherwise an + /// Error is returned. + /// + /// \param[in] Ptr + /// A pointer that points just past the initial 4 bytes of zeros at the + /// beginning of an XCOFF Traceback Table. + /// + /// \param[in, out] Size + /// A pointer that points to the length of the XCOFF Traceback Table. + /// If the XCOFF Traceback Table is not parsed successfully or there are + /// extra bytes that are not recognized, \a Size will be updated to be the + /// size up to the end of the last successfully parsed field of the table. + static Expected<XCOFFTracebackTable> create(const uint8_t *Ptr, + uint64_t &Size); + uint8_t getVersion() const; + uint8_t getLanguageID() const; + + bool isGlobalLinkage() const; + bool isOutOfLineEpilogOrPrologue() const; + bool hasTraceBackTableOffset() const; + bool isInternalProcedure() const; + bool hasControlledStorage() const; + bool isTOCless() const; + bool isFloatingPointPresent() const; + bool isFloatingPointOperationLogOrAbortEnabled() const; + + bool isInterruptHandler() const; + bool isFuncNamePresent() const; + bool isAllocaUsed() const; + uint8_t getOnConditionDirective() const; + bool isCRSaved() const; + bool isLRSaved() const; + + bool isBackChainStored() const; + bool isFixup() const; + uint8_t getNumOfFPRsSaved() const; + + bool hasVectorInfo() const; + bool hasExtensionTable() const; + uint8_t getNumOfGPRsSaved() const; + + uint8_t getNumberOfFixedParms() const; + + uint8_t getNumberOfFPParms() const; + bool hasParmsOnStack() const; + + const Optional<SmallString<32>> &getParmsType() const { return ParmsType; } + const Optional<uint32_t> &getTraceBackTableOffset() const { + return TraceBackTableOffset; + } + const Optional<uint32_t> &getHandlerMask() const { return HandlerMask; } + const Optional<uint32_t> &getNumOfCtlAnchors() { return NumOfCtlAnchors; } + const Optional<SmallVector<uint32_t, 8>> &getControlledStorageInfoDisp() { + return ControlledStorageInfoDisp; + } + const Optional<StringRef> &getFunctionName() const { return FunctionName; } + const Optional<uint8_t> &getAllocaRegister() const { return AllocaRegister; } + const Optional<TBVectorExt> &getVectorExt() const { return VecExt; } + const Optional<uint8_t> &getExtensionTable() const { return ExtensionTable; } +}; + +bool doesXCOFFTracebackTableBegin(ArrayRef<uint8_t> Bytes); } // namespace object } // namespace llvm diff --git a/llvm/include/llvm/ObjectYAML/ArchiveYAML.h b/llvm/include/llvm/ObjectYAML/ArchiveYAML.h new file mode 100644 index 000000000000..8d05feedcc62 --- /dev/null +++ b/llvm/include/llvm/ObjectYAML/ArchiveYAML.h @@ -0,0 +1,77 @@ +//===- ArchiveYAML.h - Archive YAMLIO implementation ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares classes for handling the YAML representation of archives. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECTYAML_ARCHIVEYAML_H +#define LLVM_OBJECTYAML_ARCHIVEYAML_H + +#include "llvm/Support/YAMLTraits.h" +#include "llvm/ObjectYAML/YAML.h" +#include "llvm/ADT/MapVector.h" + +namespace llvm { +namespace ArchYAML { + +struct Archive { + struct Child { + struct Field { + Field() = default; + Field(StringRef Default, unsigned Length) + : DefaultValue(Default), MaxLength(Length) {} + StringRef Value; + StringRef DefaultValue; + unsigned MaxLength; + }; + + Child() { + Fields["Name"] = {"", 16}; + Fields["LastModified"] = {"0", 12}; + Fields["UID"] = {"0", 6}; + Fields["GID"] = {"0", 6}; + Fields["AccessMode"] = {"0", 8}; + Fields["Size"] = {"0", 10}; + Fields["Terminator"] = {"`\n", 2}; + } + + MapVector<StringRef, Field> Fields; + + Optional<yaml::BinaryRef> Content; + Optional<llvm::yaml::Hex8> PaddingByte; + }; + + StringRef Magic; + Optional<std::vector<Child>> Members; + Optional<yaml::BinaryRef> Content; +}; + +} // end namespace ArchYAML +} // end namespace llvm + +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ArchYAML::Archive::Child) + +namespace llvm { +namespace yaml { + +template <> struct MappingTraits<ArchYAML::Archive> { + static void mapping(IO &IO, ArchYAML::Archive &A); + static std::string validate(IO &, ArchYAML::Archive &A); +}; + +template <> struct MappingTraits<ArchYAML::Archive::Child> { + static void mapping(IO &IO, ArchYAML::Archive::Child &C); + static std::string validate(IO &, ArchYAML::Archive::Child &C); +}; + +} // end namespace yaml +} // end namespace llvm + +#endif // LLVM_OBJECTYAML_ARCHIVEYAML_H diff --git a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h index 0ec3f90e1686..eb56d1e29326 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h +++ b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h @@ -33,15 +33,23 @@ Error emitDebugStr(raw_ostream &OS, const Data &DI); Error emitDebugAranges(raw_ostream &OS, const Data &DI); Error emitDebugRanges(raw_ostream &OS, const Data &DI); -Error emitPubSection(raw_ostream &OS, const PubSection &Sect, - bool IsLittleEndian, bool IsGNUPubSec = false); +Error emitDebugPubnames(raw_ostream &OS, const Data &DI); +Error emitDebugPubtypes(raw_ostream &OS, const Data &DI); +Error emitDebugGNUPubnames(raw_ostream &OS, const Data &DI); +Error emitDebugGNUPubtypes(raw_ostream &OS, const Data &DI); Error emitDebugInfo(raw_ostream &OS, const Data &DI); Error emitDebugLine(raw_ostream &OS, const Data &DI); Error emitDebugAddr(raw_ostream &OS, const Data &DI); +Error emitDebugStrOffsets(raw_ostream &OS, const Data &DI); +Error emitDebugRnglists(raw_ostream &OS, const Data &DI); +Error emitDebugLoclists(raw_ostream &OS, const Data &DI); +std::function<Error(raw_ostream &, const Data &)> +getDWARFEmitterByName(StringRef SecName); Expected<StringMap<std::unique_ptr<MemoryBuffer>>> -emitDebugSections(StringRef YAMLString, bool ApplyFixups = false, - bool IsLittleEndian = sys::IsLittleEndianHost); +emitDebugSections(StringRef YAMLString, + bool IsLittleEndian = sys::IsLittleEndianHost, + bool Is64BitAddrSize = true); } // end namespace DWARFYAML } // end namespace llvm diff --git a/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/llvm/include/llvm/ObjectYAML/DWARFYAML.h index 9f62a4a2be57..856cea9a1535 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFYAML.h +++ b/llvm/include/llvm/ObjectYAML/DWARFYAML.h @@ -18,33 +18,15 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/ObjectYAML/YAML.h" #include "llvm/Support/YAMLTraits.h" #include <cstdint> +#include <unordered_map> #include <vector> namespace llvm { namespace DWARFYAML { -struct InitialLength { - uint32_t TotalLength; - uint64_t TotalLength64; - - bool isDWARF64() const { return TotalLength == UINT32_MAX; } - - uint64_t getLength() const { - return isDWARF64() ? TotalLength64 : TotalLength; - } - - void setLength(uint64_t Len) { - if (Len >= (uint64_t)UINT32_MAX) { - TotalLength64 = Len; - TotalLength = UINT32_MAX; - } else { - TotalLength = Len; - } - } -}; - struct AttributeAbbrev { llvm::dwarf::Attribute Attribute; llvm::dwarf::Form Form; @@ -58,18 +40,23 @@ struct Abbrev { std::vector<AttributeAbbrev> Attributes; }; +struct AbbrevTable { + Optional<uint64_t> ID; + std::vector<Abbrev> Table; +}; + struct ARangeDescriptor { llvm::yaml::Hex64 Address; - uint64_t Length; + yaml::Hex64 Length; }; struct ARange { dwarf::DwarfFormat Format; - uint64_t Length; + Optional<yaml::Hex64> Length; uint16_t Version; - uint32_t CuOffset; - uint8_t AddrSize; - uint8_t SegSize; + yaml::Hex64 CuOffset; + Optional<yaml::Hex8> AddrSize; + yaml::Hex8 SegSize; std::vector<ARangeDescriptor> Descriptors; }; @@ -94,7 +81,8 @@ struct PubEntry { }; struct PubSection { - InitialLength Length; + dwarf::DwarfFormat Format; + yaml::Hex64 Length; uint16_t Version; uint32_t UnitOffset; uint32_t UnitSize; @@ -120,11 +108,12 @@ struct DWARFContext { struct Unit { dwarf::DwarfFormat Format; - uint64_t Length; + Optional<yaml::Hex64> Length; uint16_t Version; + Optional<uint8_t> AddrSize; llvm::dwarf::UnitType Type; // Added in DWARF 5 - yaml::Hex64 AbbrOffset; - uint8_t AddrSize; + Optional<uint64_t> AbbrevTableID; + Optional<yaml::Hex64> AbbrOffset; std::vector<Entry> Entries; }; @@ -137,7 +126,7 @@ struct File { struct LineTableOpcode { dwarf::LineNumberOps Opcode; - uint64_t ExtLen; + Optional<uint64_t> ExtLen; dwarf::LineNumberExtendedOps SubOpcode; uint64_t Data; int64_t SData; @@ -148,16 +137,16 @@ struct LineTableOpcode { struct LineTable { dwarf::DwarfFormat Format; - uint64_t Length; + Optional<uint64_t> Length; uint16_t Version; - uint64_t PrologueLength; + Optional<uint64_t> PrologueLength; uint8_t MinInstLength; uint8_t MaxOpsPerInst; uint8_t DefaultIsStmt; uint8_t LineBase; uint8_t LineRange; - uint8_t OpcodeBase; - std::vector<uint8_t> StandardOpcodeLengths; + Optional<uint8_t> OpcodeBase; + Optional<std::vector<uint8_t>> StandardOpcodeLengths; std::vector<StringRef> IncludeDirs; std::vector<File> Files; std::vector<LineTableOpcode> Opcodes; @@ -177,14 +166,56 @@ struct AddrTableEntry { std::vector<SegAddrPair> SegAddrPairs; }; +struct StringOffsetsTable { + dwarf::DwarfFormat Format; + Optional<yaml::Hex64> Length; + yaml::Hex16 Version; + yaml::Hex16 Padding; + std::vector<yaml::Hex64> Offsets; +}; + +struct DWARFOperation { + dwarf::LocationAtom Operator; + std::vector<yaml::Hex64> Values; +}; + +struct RnglistEntry { + dwarf::RnglistEntries Operator; + std::vector<yaml::Hex64> Values; +}; + +struct LoclistEntry { + dwarf::LoclistEntries Operator; + std::vector<yaml::Hex64> Values; + Optional<yaml::Hex64> DescriptionsLength; + std::vector<DWARFOperation> Descriptions; +}; + +template <typename EntryType> struct ListEntries { + Optional<std::vector<EntryType>> Entries; + Optional<yaml::BinaryRef> Content; +}; + +template <typename EntryType> struct ListTable { + dwarf::DwarfFormat Format; + Optional<yaml::Hex64> Length; + yaml::Hex16 Version; + Optional<yaml::Hex8> AddrSize; + yaml::Hex8 SegSelectorSize; + Optional<uint32_t> OffsetEntryCount; + Optional<std::vector<yaml::Hex64>> Offsets; + std::vector<ListEntries<EntryType>> Lists; +}; + struct Data { bool IsLittleEndian; bool Is64BitAddrSize; - std::vector<Abbrev> AbbrevDecls; - std::vector<StringRef> DebugStrings; - std::vector<ARange> ARanges; - std::vector<Ranges> DebugRanges; - std::vector<AddrTableEntry> DebugAddr; + std::vector<AbbrevTable> DebugAbbrev; + Optional<std::vector<StringRef>> DebugStrings; + Optional<std::vector<StringOffsetsTable>> DebugStrOffsets; + Optional<std::vector<ARange>> DebugAranges; + Optional<std::vector<Ranges>> DebugRanges; + Optional<std::vector<AddrTableEntry>> DebugAddr; Optional<PubSection> PubNames; Optional<PubSection> PubTypes; @@ -194,10 +225,23 @@ struct Data { std::vector<Unit> CompileUnits; std::vector<LineTable> DebugLines; + Optional<std::vector<ListTable<RnglistEntry>>> DebugRnglists; + Optional<std::vector<ListTable<LoclistEntry>>> DebugLoclists; bool isEmpty() const; - SetVector<StringRef> getUsedSectionNames() const; + SetVector<StringRef> getNonEmptySectionNames() const; + + struct AbbrevTableInfo { + uint64_t Index; + uint64_t Offset; + }; + Expected<AbbrevTableInfo> getAbbrevTableInfoByID(uint64_t ID) const; + StringRef getAbbrevTableContentByIndex(uint64_t Index) const; + +private: + mutable std::unordered_map<uint64_t, AbbrevTableInfo> AbbrevTableInfoMap; + mutable std::unordered_map<uint64_t, std::string> AbbrevTableContents; }; } // end namespace DWARFYAML @@ -205,6 +249,7 @@ struct Data { LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::AttributeAbbrev) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::Abbrev) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::AbbrevTable) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::ARangeDescriptor) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::ARange) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::RangeEntry) @@ -218,6 +263,18 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::LineTable) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::LineTableOpcode) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::SegAddrPair) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::AddrTableEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::StringOffsetsTable) +LLVM_YAML_IS_SEQUENCE_VECTOR( + llvm::DWARFYAML::ListTable<DWARFYAML::RnglistEntry>) +LLVM_YAML_IS_SEQUENCE_VECTOR( + llvm::DWARFYAML::ListEntries<DWARFYAML::RnglistEntry>) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::RnglistEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR( + llvm::DWARFYAML::ListTable<DWARFYAML::LoclistEntry>) +LLVM_YAML_IS_SEQUENCE_VECTOR( + llvm::DWARFYAML::ListEntries<DWARFYAML::LoclistEntry>) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::LoclistEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::DWARFOperation) namespace llvm { namespace yaml { @@ -226,6 +283,10 @@ template <> struct MappingTraits<DWARFYAML::Data> { static void mapping(IO &IO, DWARFYAML::Data &DWARF); }; +template <> struct MappingTraits<DWARFYAML::AbbrevTable> { + static void mapping(IO &IO, DWARFYAML::AbbrevTable &AbbrevTable); +}; + template <> struct MappingTraits<DWARFYAML::Abbrev> { static void mapping(IO &IO, DWARFYAML::Abbrev &Abbrev); }; @@ -286,12 +347,36 @@ template <> struct MappingTraits<DWARFYAML::SegAddrPair> { static void mapping(IO &IO, DWARFYAML::SegAddrPair &SegAddrPair); }; +template <> struct MappingTraits<DWARFYAML::DWARFOperation> { + static void mapping(IO &IO, DWARFYAML::DWARFOperation &DWARFOperation); +}; + +template <typename EntryType> +struct MappingTraits<DWARFYAML::ListTable<EntryType>> { + static void mapping(IO &IO, DWARFYAML::ListTable<EntryType> &ListTable); +}; + +template <typename EntryType> +struct MappingTraits<DWARFYAML::ListEntries<EntryType>> { + static void mapping(IO &IO, DWARFYAML::ListEntries<EntryType> &ListEntries); + static std::string validate(IO &IO, + DWARFYAML::ListEntries<EntryType> &ListEntries); +}; + +template <> struct MappingTraits<DWARFYAML::RnglistEntry> { + static void mapping(IO &IO, DWARFYAML::RnglistEntry &RnglistEntry); +}; + +template <> struct MappingTraits<DWARFYAML::LoclistEntry> { + static void mapping(IO &IO, DWARFYAML::LoclistEntry &LoclistEntry); +}; + template <> struct MappingTraits<DWARFYAML::AddrTableEntry> { static void mapping(IO &IO, DWARFYAML::AddrTableEntry &AddrTable); }; -template <> struct MappingTraits<DWARFYAML::InitialLength> { - static void mapping(IO &IO, DWARFYAML::InitialLength &DWARF); +template <> struct MappingTraits<DWARFYAML::StringOffsetsTable> { + static void mapping(IO &IO, DWARFYAML::StringOffsetsTable &StrOffsetsTable); }; template <> struct ScalarEnumerationTraits<dwarf::DwarfFormat> { @@ -369,6 +454,34 @@ template <> struct ScalarEnumerationTraits<dwarf::Constants> { } }; +#define HANDLE_DW_RLE(unused, name) \ + io.enumCase(value, "DW_RLE_" #name, dwarf::DW_RLE_##name); + +template <> struct ScalarEnumerationTraits<dwarf::RnglistEntries> { + static void enumeration(IO &io, dwarf::RnglistEntries &value) { +#include "llvm/BinaryFormat/Dwarf.def" + } +}; + +#define HANDLE_DW_LLE(unused, name) \ + io.enumCase(value, "DW_LLE_" #name, dwarf::DW_LLE_##name); + +template <> struct ScalarEnumerationTraits<dwarf::LoclistEntries> { + static void enumeration(IO &io, dwarf::LoclistEntries &value) { +#include "llvm/BinaryFormat/Dwarf.def" + } +}; + +#define HANDLE_DW_OP(id, name, version, vendor) \ + io.enumCase(value, "DW_OP_" #name, dwarf::DW_OP_##name); + +template <> struct ScalarEnumerationTraits<dwarf::LocationAtom> { + static void enumeration(IO &io, dwarf::LocationAtom &value) { +#include "llvm/BinaryFormat/Dwarf.def" + io.enumFallback<yaml::Hex8>(value); + } +}; + } // end namespace yaml } // end namespace llvm diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index b1ffb20681ea..4f3c76bbd82c 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -16,6 +16,8 @@ #define LLVM_OBJECTYAML_ELFYAML_H #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/Object/ELFTypes.h" #include "llvm/ObjectYAML/DWARFYAML.h" #include "llvm/ObjectYAML/YAML.h" #include "llvm/Support/YAMLTraits.h" @@ -69,6 +71,41 @@ LLVM_YAML_STRONG_TYPEDEF(uint32_t, MIPS_ISA) LLVM_YAML_STRONG_TYPEDEF(StringRef, YAMLFlowString) LLVM_YAML_STRONG_TYPEDEF(int64_t, YAMLIntUInt) +template <class ELFT> +unsigned getDefaultShEntSize(unsigned EMachine, ELF_SHT SecType, + StringRef SecName) { + if (EMachine == ELF::EM_MIPS && SecType == ELF::SHT_MIPS_ABIFLAGS) + return sizeof(object::Elf_Mips_ABIFlags<ELFT>); + + switch (SecType) { + case ELF::SHT_SYMTAB: + case ELF::SHT_DYNSYM: + return sizeof(typename ELFT::Sym); + case ELF::SHT_GROUP: + return sizeof(typename ELFT::Word); + case ELF::SHT_REL: + return sizeof(typename ELFT::Rel); + case ELF::SHT_RELA: + return sizeof(typename ELFT::Rela); + case ELF::SHT_RELR: + return sizeof(typename ELFT::Relr); + case ELF::SHT_DYNAMIC: + return sizeof(typename ELFT::Dyn); + case ELF::SHT_HASH: + return sizeof(typename ELFT::Word); + case ELF::SHT_SYMTAB_SHNDX: + return sizeof(typename ELFT::Word); + case ELF::SHT_GNU_versym: + return sizeof(typename ELFT::Half); + case ELF::SHT_LLVM_CALL_GRAPH_PROFILE: + return sizeof(object::Elf_CGProfile_Impl<ELFT>); + default: + if (SecName == ".debug_str") + return 1; + return 0; + } +} + // For now, hardcode 64 bits everywhere that 32 or 64 would be needed // since 64-bit can hold 32-bit values too. struct FileHeader { @@ -77,7 +114,7 @@ struct FileHeader { ELF_ELFOSABI OSABI; llvm::yaml::Hex8 ABIVersion; ELF_ET Type; - ELF_EM Machine; + Optional<ELF_EM> Machine; ELF_EF Flags; llvm::yaml::Hex64 Entry; @@ -94,24 +131,14 @@ struct SectionHeader { StringRef Name; }; -struct SectionHeaderTable { - Optional<std::vector<SectionHeader>> Sections; - Optional<std::vector<SectionHeader>> Excluded; - Optional<bool> NoHeaders; -}; - -struct SectionName { - StringRef Section; -}; - struct Symbol { StringRef Name; ELF_STT Type; - StringRef Section; + Optional<StringRef> Section; Optional<ELF_SHN> Index; ELF_STB Binding; - llvm::yaml::Hex64 Value; - llvm::yaml::Hex64 Size; + Optional<llvm::yaml::Hex64> Value; + Optional<llvm::yaml::Hex64> Size; Optional<uint8_t> Other; Optional<uint32_t> StName; @@ -126,6 +153,16 @@ struct DynamicEntry { llvm::yaml::Hex64 Val; }; +struct BBAddrMapEntry { + struct BBEntry { + llvm::yaml::Hex32 AddressOffset; + llvm::yaml::Hex32 Size; + llvm::yaml::Hex32 Metadata; + }; + llvm::yaml::Hex64 Address; + Optional<std::vector<BBEntry>> BBEntries; +}; + struct StackSizeEntry { llvm::yaml::Hex64 Address; llvm::yaml::Hex64 Size; @@ -153,19 +190,29 @@ struct Chunk { StackSizes, SymtabShndxSection, Symver, + ARMIndexTable, MipsABIFlags, Addrsig, - Fill, LinkerOptions, DependentLibraries, - CallGraphProfile + CallGraphProfile, + BBAddrMap, + + // Special chunks. + SpecialChunksStart, + Fill = SpecialChunksStart, + SectionHeaderTable, }; ChunkKind Kind; StringRef Name; Optional<llvm::yaml::Hex64> Offset; - Chunk(ChunkKind K) : Kind(K) {} + // Usually chunks are not created implicitly, but rather loaded from YAML. + // This flag is used to signal whether this is the case or not. + bool IsImplicit; + + Chunk(ChunkKind K, bool Implicit) : Kind(K), IsImplicit(Implicit) {} virtual ~Chunk(); }; @@ -173,25 +220,35 @@ struct Section : public Chunk { ELF_SHT Type; Optional<ELF_SHF> Flags; Optional<llvm::yaml::Hex64> Address; - StringRef Link; + Optional<StringRef> Link; llvm::yaml::Hex64 AddressAlign; Optional<llvm::yaml::Hex64> EntSize; - // Usually sections are not created implicitly, but loaded from YAML. - // When they are, this flag is used to signal about that. - bool IsImplicit; + Optional<yaml::BinaryRef> Content; + Optional<llvm::yaml::Hex64> Size; // Holds the original section index. unsigned OriginalSecNdx; - Section(ChunkKind Kind, bool IsImplicit = false) - : Chunk(Kind), IsImplicit(IsImplicit) {} + Section(ChunkKind Kind, bool IsImplicit = false) : Chunk(Kind, IsImplicit) {} - static bool classof(const Chunk *S) { return S->Kind != ChunkKind::Fill; } + static bool classof(const Chunk *S) { + return S->Kind < ChunkKind::SpecialChunksStart; + } + + // Some derived sections might have their own special entries. This method + // returns a vector of <entry name, is used> pairs. It is used for section + // validation. + virtual std::vector<std::pair<StringRef, bool>> getEntries() const { + return {}; + }; // The following members are used to override section fields which is // useful for creating invalid objects. + // This can be used to override the sh_addralign field. + Optional<llvm::yaml::Hex64> ShAddrAlign; + // This can be used to override the offset stored in the sh_name field. // It does not affect the name stored in the string table. Optional<llvm::yaml::Hex64> ShName; @@ -206,6 +263,12 @@ struct Section : public Chunk { // This can be used to override the sh_flags field. Optional<llvm::yaml::Hex64> ShFlags; + + // This can be used to override the sh_type field. It is useful when we + // want to use specific YAML keys for a section of a particular type to + // describe the content, but still want to have a different final type + // for the section. + Optional<ELF_SHT> ShType; }; // Fill is a block of data which is placed outside of sections. It is @@ -215,18 +278,57 @@ struct Fill : Chunk { Optional<yaml::BinaryRef> Pattern; llvm::yaml::Hex64 Size; - Fill() : Chunk(ChunkKind::Fill) {} + Fill() : Chunk(ChunkKind::Fill, /*Implicit=*/false) {} static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Fill; } }; +struct SectionHeaderTable : Chunk { + SectionHeaderTable(bool IsImplicit) + : Chunk(ChunkKind::SectionHeaderTable, IsImplicit) {} + + static bool classof(const Chunk *S) { + return S->Kind == ChunkKind::SectionHeaderTable; + } + + Optional<std::vector<SectionHeader>> Sections; + Optional<std::vector<SectionHeader>> Excluded; + Optional<bool> NoHeaders; + + size_t getNumHeaders(size_t SectionsNum) const { + if (IsImplicit) + return SectionsNum; + if (NoHeaders) + return (*NoHeaders) ? 0 : SectionsNum; + return (Sections ? Sections->size() : 0) + /*Null section*/ 1; + } + + static constexpr StringRef TypeStr = "SectionHeaderTable"; +}; + +struct BBAddrMapSection : Section { + Optional<std::vector<BBAddrMapEntry>> Entries; + + BBAddrMapSection() : Section(ChunkKind::BBAddrMap) {} + + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Entries", Entries.hasValue()}}; + }; + + static bool classof(const Chunk *S) { + return S->Kind == ChunkKind::BBAddrMap; + } +}; + struct StackSizesSection : Section { - Optional<yaml::BinaryRef> Content; - Optional<llvm::yaml::Hex64> Size; Optional<std::vector<StackSizeEntry>> Entries; StackSizesSection() : Section(ChunkKind::StackSizes) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Entries", Entries.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::StackSizes; } @@ -237,17 +339,18 @@ struct StackSizesSection : Section { }; struct DynamicSection : Section { - std::vector<DynamicEntry> Entries; - Optional<yaml::BinaryRef> Content; + Optional<std::vector<DynamicEntry>> Entries; DynamicSection() : Section(ChunkKind::Dynamic) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Entries", Entries.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Dynamic; } }; struct RawContentSection : Section { - Optional<yaml::BinaryRef> Content; - Optional<llvm::yaml::Hex64> Size; Optional<llvm::yaml::Hex64> Info; RawContentSection() : Section(ChunkKind::RawContent) {} @@ -261,29 +364,31 @@ struct RawContentSection : Section { }; struct NoBitsSection : Section { - llvm::yaml::Hex64 Size; - NoBitsSection() : Section(ChunkKind::NoBits) {} static bool classof(const Chunk *S) { return S->Kind == ChunkKind::NoBits; } }; struct NoteSection : Section { - Optional<yaml::BinaryRef> Content; - Optional<llvm::yaml::Hex64> Size; Optional<std::vector<ELFYAML::NoteEntry>> Notes; NoteSection() : Section(ChunkKind::Note) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Notes", Notes.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Note; } }; struct HashSection : Section { - Optional<yaml::BinaryRef> Content; - Optional<llvm::yaml::Hex64> Size; Optional<std::vector<uint32_t>> Bucket; Optional<std::vector<uint32_t>> Chain; + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Bucket", Bucket.hasValue()}, {"Chain", Chain.hasValue()}}; + }; + // The following members are used to override section fields. // This is useful for creating invalid objects. Optional<llvm::yaml::Hex64> NBucket; @@ -315,8 +420,6 @@ struct GnuHashHeader { }; struct GnuHashSection : Section { - Optional<yaml::BinaryRef> Content; - Optional<GnuHashHeader> Header; Optional<std::vector<llvm::yaml::Hex64>> BloomFilter; Optional<std::vector<llvm::yaml::Hex32>> HashBuckets; @@ -324,6 +427,13 @@ struct GnuHashSection : Section { GnuHashSection() : Section(ChunkKind::GnuHash) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Header", Header.hasValue()}, + {"BloomFilter", BloomFilter.hasValue()}, + {"HashBuckets", HashBuckets.hasValue()}, + {"HashValues", HashValues.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::GnuHash; } }; @@ -341,24 +451,29 @@ struct VerneedEntry { }; struct VerneedSection : Section { - Optional<yaml::BinaryRef> Content; Optional<std::vector<VerneedEntry>> VerneedV; - llvm::yaml::Hex64 Info; + Optional<llvm::yaml::Hex64> Info; VerneedSection() : Section(ChunkKind::Verneed) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Dependencies", VerneedV.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Verneed; } }; struct AddrsigSection : Section { - Optional<yaml::BinaryRef> Content; - Optional<llvm::yaml::Hex64> Size; Optional<std::vector<YAMLFlowString>> Symbols; AddrsigSection() : Section(ChunkKind::Addrsig) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Symbols", Symbols.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Addrsig; } }; @@ -369,10 +484,13 @@ struct LinkerOption { struct LinkerOptionsSection : Section { Optional<std::vector<LinkerOption>> Options; - Optional<yaml::BinaryRef> Content; LinkerOptionsSection() : Section(ChunkKind::LinkerOptions) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Options", Options.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::LinkerOptions; } @@ -380,10 +498,13 @@ struct LinkerOptionsSection : Section { struct DependentLibrariesSection : Section { Optional<std::vector<YAMLFlowString>> Libs; - Optional<yaml::BinaryRef> Content; DependentLibrariesSection() : Section(ChunkKind::DependentLibraries) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Libraries", Libs.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::DependentLibraries; } @@ -401,49 +522,62 @@ struct CallGraphEntry { struct CallGraphProfileSection : Section { Optional<std::vector<CallGraphEntry>> Entries; - Optional<yaml::BinaryRef> Content; CallGraphProfileSection() : Section(ChunkKind::CallGraphProfile) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Entries", Entries.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::CallGraphProfile; } }; struct SymverSection : Section { - std::vector<uint16_t> Entries; + Optional<std::vector<uint16_t>> Entries; SymverSection() : Section(ChunkKind::Symver) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Entries", Entries.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Symver; } }; struct VerdefEntry { - uint16_t Version; - uint16_t Flags; - uint16_t VersionNdx; - uint32_t Hash; + Optional<uint16_t> Version; + Optional<uint16_t> Flags; + Optional<uint16_t> VersionNdx; + Optional<uint32_t> Hash; std::vector<StringRef> VerNames; }; struct VerdefSection : Section { Optional<std::vector<VerdefEntry>> Entries; - Optional<yaml::BinaryRef> Content; - - llvm::yaml::Hex64 Info; + Optional<llvm::yaml::Hex64> Info; VerdefSection() : Section(ChunkKind::Verdef) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Entries", Entries.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Verdef; } }; -struct Group : Section { +struct GroupSection : Section { // Members of a group contain a flag and a list of section indices // that are part of the group. - std::vector<SectionOrType> Members; + Optional<std::vector<SectionOrType>> Members; Optional<StringRef> Signature; /* Info */ - Group() : Section(ChunkKind::Group) {} + GroupSection() : Section(ChunkKind::Group) {} + + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Members", Members.hasValue()}}; + }; static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Group; } }; @@ -456,11 +590,15 @@ struct Relocation { }; struct RelocationSection : Section { - std::vector<Relocation> Relocations; + Optional<std::vector<Relocation>> Relocations; StringRef RelocatableSec; /* Info */ RelocationSection() : Section(ChunkKind::Relocation) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Relocations", Relocations.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Relocation; } @@ -468,25 +606,51 @@ struct RelocationSection : Section { struct RelrSection : Section { Optional<std::vector<llvm::yaml::Hex64>> Entries; - Optional<yaml::BinaryRef> Content; RelrSection() : Section(ChunkKind::Relr) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Entries", Entries.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::Relr; } }; struct SymtabShndxSection : Section { - std::vector<uint32_t> Entries; + Optional<std::vector<uint32_t>> Entries; SymtabShndxSection() : Section(ChunkKind::SymtabShndxSection) {} + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Entries", Entries.hasValue()}}; + }; + static bool classof(const Chunk *S) { return S->Kind == ChunkKind::SymtabShndxSection; } }; +struct ARMIndexTableEntry { + llvm::yaml::Hex32 Offset; + llvm::yaml::Hex32 Value; +}; + +struct ARMIndexTableSection : Section { + Optional<std::vector<ARMIndexTableEntry>> Entries; + + ARMIndexTableSection() : Section(ChunkKind::ARMIndexTable) {} + + std::vector<std::pair<StringRef, bool>> getEntries() const override { + return {{"Entries", Entries.hasValue()}}; + }; + + static bool classof(const Chunk *S) { + return S->Kind == ChunkKind::ARMIndexTable; + } +}; + // Represents .MIPS.abiflags section struct MipsABIFlags : Section { llvm::yaml::Hex16 Version; @@ -517,15 +681,15 @@ struct ProgramHeader { Optional<llvm::yaml::Hex64> FileSize; Optional<llvm::yaml::Hex64> MemSize; Optional<llvm::yaml::Hex64> Offset; + Optional<StringRef> FirstSec; + Optional<StringRef> LastSec; - std::vector<SectionName> Sections; - // This vector is parallel to Sections and contains corresponding chunks. + // This vector contains all chunks from [FirstSec, LastSec]. std::vector<Chunk *> Chunks; }; struct Object { FileHeader Header; - Optional<SectionHeaderTable> SectionHeaders; std::vector<ProgramHeader> ProgramHeaders; // An object might contain output section descriptions as well as @@ -547,12 +711,26 @@ struct Object { Ret.push_back(S); return Ret; } + + const SectionHeaderTable &getSectionHeaderTable() const { + for (const std::unique_ptr<Chunk> &C : Chunks) + if (auto *S = dyn_cast<ELFYAML::SectionHeaderTable>(C.get())) + return *S; + llvm_unreachable("the section header table chunk must always be present"); + } + + unsigned getMachine() const; }; +bool shouldAllocateFileSpace(ArrayRef<ProgramHeader> Phdrs, + const NoBitsSection &S); + } // end namespace ELFYAML } // end namespace llvm LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::StackSizeEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::BBAddrMapEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::BBAddrMapEntry::BBEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::DynamicEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::LinkerOption) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::CallGraphEntry) @@ -566,7 +744,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::VernauxEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::VerneedEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::Relocation) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::SectionOrType) -LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::SectionName) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::ELFYAML::ARMIndexTableEntry) namespace llvm { namespace yaml { @@ -690,29 +868,33 @@ struct MappingTraits<ELFYAML::FileHeader> { static void mapping(IO &IO, ELFYAML::FileHeader &FileHdr); }; -template <> struct MappingTraits<ELFYAML::SectionHeaderTable> { - static void mapping(IO &IO, ELFYAML::SectionHeaderTable &SecHdrTable); - static StringRef validate(IO &IO, ELFYAML::SectionHeaderTable &SecHdrTable); -}; - template <> struct MappingTraits<ELFYAML::SectionHeader> { static void mapping(IO &IO, ELFYAML::SectionHeader &SHdr); }; template <> struct MappingTraits<ELFYAML::ProgramHeader> { static void mapping(IO &IO, ELFYAML::ProgramHeader &FileHdr); + static std::string validate(IO &IO, ELFYAML::ProgramHeader &FileHdr); }; template <> struct MappingTraits<ELFYAML::Symbol> { static void mapping(IO &IO, ELFYAML::Symbol &Symbol); - static StringRef validate(IO &IO, ELFYAML::Symbol &Symbol); + static std::string validate(IO &IO, ELFYAML::Symbol &Symbol); }; template <> struct MappingTraits<ELFYAML::StackSizeEntry> { static void mapping(IO &IO, ELFYAML::StackSizeEntry &Rel); }; +template <> struct MappingTraits<ELFYAML::BBAddrMapEntry> { + static void mapping(IO &IO, ELFYAML::BBAddrMapEntry &Rel); +}; + +template <> struct MappingTraits<ELFYAML::BBAddrMapEntry::BBEntry> { + static void mapping(IO &IO, ELFYAML::BBAddrMapEntry::BBEntry &Rel); +}; + template <> struct MappingTraits<ELFYAML::GnuHashHeader> { static void mapping(IO &IO, ELFYAML::GnuHashHeader &Rel); }; @@ -749,9 +931,13 @@ template <> struct MappingTraits<ELFYAML::Relocation> { static void mapping(IO &IO, ELFYAML::Relocation &Rel); }; +template <> struct MappingTraits<ELFYAML::ARMIndexTableEntry> { + static void mapping(IO &IO, ELFYAML::ARMIndexTableEntry &E); +}; + template <> struct MappingTraits<std::unique_ptr<ELFYAML::Chunk>> { static void mapping(IO &IO, std::unique_ptr<ELFYAML::Chunk> &C); - static StringRef validate(IO &io, std::unique_ptr<ELFYAML::Chunk> &C); + static std::string validate(IO &io, std::unique_ptr<ELFYAML::Chunk> &C); }; template <> @@ -763,10 +949,6 @@ template <> struct MappingTraits<ELFYAML::SectionOrType> { static void mapping(IO &IO, ELFYAML::SectionOrType §ionOrType); }; -template <> struct MappingTraits<ELFYAML::SectionName> { - static void mapping(IO &IO, ELFYAML::SectionName §ionName); -}; - } // end namespace yaml } // end namespace llvm diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h index fb6780b6d0ed..94e66c5ae787 100644 --- a/llvm/include/llvm/ObjectYAML/MachOYAML.h +++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h @@ -220,7 +220,7 @@ template <> struct MappingTraits<MachOYAML::Relocation> { template <> struct MappingTraits<MachOYAML::Section> { static void mapping(IO &IO, MachOYAML::Section &Section); - static StringRef validate(IO &io, MachOYAML::Section &Section); + static std::string validate(IO &io, MachOYAML::Section &Section); }; template <> struct MappingTraits<MachOYAML::NListEntry> { diff --git a/llvm/include/llvm/ObjectYAML/MinidumpYAML.h b/llvm/include/llvm/ObjectYAML/MinidumpYAML.h index c1711a28dd84..b0cee541cef2 100644 --- a/llvm/include/llvm/ObjectYAML/MinidumpYAML.h +++ b/llvm/include/llvm/ObjectYAML/MinidumpYAML.h @@ -236,7 +236,7 @@ template <> struct BlockScalarTraits<MinidumpYAML::BlockStringRef> { template <> struct MappingTraits<std::unique_ptr<MinidumpYAML::Stream>> { static void mapping(IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S); - static StringRef validate(IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S); + static std::string validate(IO &IO, std::unique_ptr<MinidumpYAML::Stream> &S); }; template <> struct MappingContextTraits<minidump::MemoryDescriptor, BinaryRef> { diff --git a/llvm/include/llvm/ObjectYAML/ObjectYAML.h b/llvm/include/llvm/ObjectYAML/ObjectYAML.h index 0015fd3dc501..dd26ce3e9703 100644 --- a/llvm/include/llvm/ObjectYAML/ObjectYAML.h +++ b/llvm/include/llvm/ObjectYAML/ObjectYAML.h @@ -9,6 +9,7 @@ #ifndef LLVM_OBJECTYAML_OBJECTYAML_H #define LLVM_OBJECTYAML_OBJECTYAML_H +#include "llvm/ObjectYAML/ArchiveYAML.h" #include "llvm/ObjectYAML/COFFYAML.h" #include "llvm/ObjectYAML/ELFYAML.h" #include "llvm/ObjectYAML/MachOYAML.h" @@ -23,6 +24,7 @@ namespace yaml { class IO; struct YamlObjectFile { + std::unique_ptr<ArchYAML::Archive> Arch; std::unique_ptr<ELFYAML::Object> Elf; std::unique_ptr<COFFYAML::Object> Coff; std::unique_ptr<MachOYAML::Object> MachO; diff --git a/llvm/include/llvm/ObjectYAML/WasmYAML.h b/llvm/include/llvm/ObjectYAML/WasmYAML.h index bffb314e2d3b..80f1b4006205 100644 --- a/llvm/include/llvm/ObjectYAML/WasmYAML.h +++ b/llvm/include/llvm/ObjectYAML/WasmYAML.h @@ -53,6 +53,7 @@ struct Limits { struct Table { TableType ElemType; Limits TableLimits; + uint32_t Index; }; struct Export { @@ -220,6 +221,8 @@ struct NameSection : CustomSection { } std::vector<NameEntry> FunctionNames; + std::vector<NameEntry> GlobalNames; + std::vector<NameEntry> DataSegmentNames; }; struct LinkingSection : CustomSection { diff --git a/llvm/include/llvm/ObjectYAML/yaml2obj.h b/llvm/include/llvm/ObjectYAML/yaml2obj.h index 34def363a55b..1f693475c946 100644 --- a/llvm/include/llvm/ObjectYAML/yaml2obj.h +++ b/llvm/include/llvm/ObjectYAML/yaml2obj.h @@ -40,12 +40,17 @@ namespace WasmYAML { struct Object; } +namespace ArchYAML { +struct Archive; +} + namespace yaml { class Input; struct YamlObjectFile; using ErrorHandler = llvm::function_ref<void(const Twine &Msg)>; +bool yaml2archive(ArchYAML::Archive &Doc, raw_ostream &Out, ErrorHandler EH); bool yaml2coff(COFFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH); bool yaml2elf(ELFYAML::Object &Doc, raw_ostream &Out, ErrorHandler EH, uint64_t MaxSize); diff --git a/llvm/include/llvm/Option/ArgList.h b/llvm/include/llvm/Option/ArgList.h index 74bfadcba726..9ce783978185 100644 --- a/llvm/include/llvm/Option/ArgList.h +++ b/llvm/include/llvm/Option/ArgList.h @@ -412,6 +412,10 @@ public: return ArgStrings[Index]; } + void replaceArgString(unsigned Index, const Twine &S) { + ArgStrings[Index] = MakeArgString(S); + } + unsigned getNumInputArgStrings() const override { return NumInputArgStrings; } diff --git a/llvm/include/llvm/Option/OptParser.td b/llvm/include/llvm/Option/OptParser.td index e32355444d7b..9a179c511bd6 100644 --- a/llvm/include/llvm/Option/OptParser.td +++ b/llvm/include/llvm/Option/OptParser.td @@ -13,7 +13,7 @@ // Define the kinds of options. -class OptionKind<string name, int precedence = 0, bit sentinel = 0> { +class OptionKind<string name, int precedence = 0, bit sentinel = false> { string Name = name; // The kind precedence, kinds with lower precedence are matched first. int Precedence = precedence; @@ -24,9 +24,9 @@ class OptionKind<string name, int precedence = 0, bit sentinel = 0> { // An option group. def KIND_GROUP : OptionKind<"Group">; // The input option kind. -def KIND_INPUT : OptionKind<"Input", 1, 1>; +def KIND_INPUT : OptionKind<"Input", 1, true>; // The unknown option kind. -def KIND_UNKNOWN : OptionKind<"Unknown", 2, 1>; +def KIND_UNKNOWN : OptionKind<"Unknown", 2, true>; // A flag with no values. def KIND_FLAG : OptionKind<"Flag">; // An option which prefixes its (single) value. @@ -97,17 +97,19 @@ class Option<list<string> prefixes, string name, OptionKind kind> { OptionGroup Group = ?; Option Alias = ?; list<string> AliasArgs = []; - string MarshallingKind = ?; + code MacroPrefix = ""; code KeyPath = ?; code DefaultValue = ?; - bit ShouldAlwaysEmit = 0; - // Used by the Flag option kind. - bit IsPositive = 1; - // Used by the String option kind. + code ImpliedValue = ?; + code ImpliedCheck = "false"; + code ShouldParse = "true"; + bit ShouldAlwaysEmit = false; code NormalizerRetTy = ?; code NormalizedValuesScope = ""; code Normalizer = ""; code Denormalizer = ""; + code ValueMerger = "mergeForwardValue"; + code ValueExtractor = "extractForwardValue"; list<code> NormalizedValues = ?; } @@ -144,34 +146,85 @@ class ValuesCode<code valuecode> { code ValuesCode = valuecode; } // Helpers for defining marshalling information. -class MarshallingInfo<code keypath, code defaultvalue> { - code KeyPath = keypath; +class KeyPathAndMacro<string key_path_prefix, string key_path_base, + string macro_prefix = ""> { + code KeyPath = !strconcat(key_path_prefix, key_path_base); + code MacroPrefix = macro_prefix; +} + +def EmptyKPM : KeyPathAndMacro<"", "">; + +class ImpliedByAnyOf<list<string> key_paths, code value = "true"> { + code ImpliedCheck = !foldl("false", key_paths, accumulator, key_path, + !strconcat(accumulator, " || ", key_path)); + code ImpliedValue = value; +} + +class MarshallingInfo<KeyPathAndMacro kpm, code defaultvalue> { + code KeyPath = kpm.KeyPath; + code MacroPrefix = kpm.MacroPrefix; code DefaultValue = defaultvalue; } -class MarshallingInfoString<code keypath, code defaultvalue, code normalizerretty> - : MarshallingInfo<keypath, defaultvalue> { - string MarshallingKind = "string"; - code NormalizerRetTy = normalizerretty; + +class MarshallingInfoString<KeyPathAndMacro kpm, code defaultvalue="std::string()"> + : MarshallingInfo<kpm, defaultvalue> { + code Normalizer = "normalizeString"; + code Denormalizer = "denormalizeString"; +} + +class MarshallingInfoStringInt<KeyPathAndMacro kpm, code defaultvalue="0", code type="unsigned"> + : MarshallingInfo<kpm, defaultvalue> { + code Normalizer = "normalizeStringIntegral<"#type#">"; + code Denormalizer = "denormalizeString"; +} + +class MarshallingInfoStringVector<KeyPathAndMacro kpm> + : MarshallingInfo<kpm, "std::vector<std::string>({})"> { + code Normalizer = "normalizeStringVector"; + code Denormalizer = "denormalizeStringVector"; +} + +class MarshallingInfoFlag<KeyPathAndMacro kpm, code defaultvalue = "false"> + : MarshallingInfo<kpm, defaultvalue> { + code Normalizer = "normalizeSimpleFlag"; + code Denormalizer = "denormalizeSimpleFlag"; +} + +class MarshallingInfoNegativeFlag<KeyPathAndMacro kpm, code defaultvalue = "true"> + : MarshallingInfo<kpm, defaultvalue> { + code Normalizer = "normalizeSimpleNegativeFlag"; + code Denormalizer = "denormalizeSimpleFlag"; +} + +class MarshallingInfoBitfieldFlag<KeyPathAndMacro kpm, code value> + : MarshallingInfoFlag<kpm, "0u"> { + code Normalizer = "makeFlagToValueNormalizer("#value#")"; + code ValueMerger = "mergeMaskValue"; + code ValueExtractor = "(extractMaskValue<unsigned, decltype("#value#"), "#value#">)"; } -class MarshallingInfoFlag<code keypath, code defaultvalue> - : MarshallingInfo<keypath, defaultvalue> { - string MarshallingKind = "flag"; +// Marshalling info for booleans. Applied to the flag setting keypath to false. +class MarshallingInfoBooleanFlag<KeyPathAndMacro kpm, code defaultvalue, code value, code name, + code other_value, code other_name> + : MarshallingInfoFlag<kpm, defaultvalue> { + code Normalizer = "makeBooleanOptionNormalizer("#value#", "#other_value#", OPT_"#other_name#")"; + code Denormalizer = "makeBooleanOptionDenormalizer("#value#")"; } // Mixins for additional marshalling attributes. -class IsNegative { bit IsPositive = 0; } -class AlwaysEmit { bit ShouldAlwaysEmit = 1; } +class ShouldParseIf<code condition> { code ShouldParse = condition; } +class AlwaysEmit { bit ShouldAlwaysEmit = true; } class Normalizer<code normalizer> { code Normalizer = normalizer; } class Denormalizer<code denormalizer> { code Denormalizer = denormalizer; } class NormalizedValuesScope<code scope> { code NormalizedValuesScope = scope; } class NormalizedValues<list<code> definitions> { list<code> NormalizedValues = definitions; } -class DenormalizeString { code Denormalizer = "denormalizeString"; } class AutoNormalizeEnum { code Normalizer = "normalizeSimpleEnum"; code Denormalizer = "denormalizeSimpleEnum"; } +class ValueMerger<code merger> { code ValueMerger = merger; } +class ValueExtractor<code extractor> { code ValueExtractor = extractor; } // Predefined options. diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h index 5db30436069d..58c09b23d237 100644 --- a/llvm/include/llvm/Option/OptTable.h +++ b/llvm/include/llvm/Option/OptTable.h @@ -13,6 +13,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/Option/OptSpecifier.h" +#include "llvm/Support/StringSaver.h" #include <cassert> #include <string> #include <vector> @@ -20,6 +21,7 @@ namespace llvm { class raw_ostream; +template <typename Fn> class function_ref; namespace opt { @@ -48,7 +50,7 @@ public: unsigned ID; unsigned char Kind; unsigned char Param; - unsigned short Flags; + unsigned int Flags; unsigned short GroupID; unsigned short AliasID; const char *AliasArgs; @@ -59,6 +61,8 @@ private: /// The option information table. std::vector<Info> OptionInfos; bool IgnoreCase; + bool GroupedShortOptions = false; + const char *EnvVar = nullptr; unsigned TheInputOptionID = 0; unsigned TheUnknownOptionID = 0; @@ -79,6 +83,8 @@ private: return OptionInfos[id - 1]; } + Arg *parseOneArgGrouped(InputArgList &Args, unsigned &Index) const; + protected: OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false); @@ -120,6 +126,12 @@ public: return getInfo(id).MetaVar; } + /// Specify the environment variable where initial options should be read. + void setInitialOptionsFromEnvironment(const char *E) { EnvVar = E; } + + /// Support grouped short options. e.g. -ab represents -a -b. + void setGroupedShortOptions(bool Value) { GroupedShortOptions = Value; } + /// Find possible value for given flags. This is used for shell /// autocompletion. /// @@ -140,7 +152,7 @@ public: /// /// \return The vector of flags which start with Cur. std::vector<std::string> findByPrefix(StringRef Cur, - unsigned short DisableFlags) const; + unsigned int DisableFlags) const; /// Find the OptTable option that most closely matches the given string. /// @@ -213,6 +225,18 @@ public: unsigned &MissingArgCount, unsigned FlagsToInclude = 0, unsigned FlagsToExclude = 0) const; + /// A convenience helper which handles optional initial options populated from + /// an environment variable, expands response files recursively and parses + /// options. + /// + /// \param ErrorFn - Called on a formatted error message for missing arguments + /// or unknown options. + /// \return An InputArgList; on error this will contain all the options which + /// could be parsed. + InputArgList parseArgs(int Argc, char *const *Argv, OptSpecifier Unknown, + StringSaver &Saver, + function_ref<void(StringRef)> ErrorFn) const; + /// Render the help text for an option table. /// /// \param OS - The stream to write the help text to. diff --git a/llvm/include/llvm/Option/Option.h b/llvm/include/llvm/Option/Option.h index 73ee8e0073b8..196cf656355d 100644 --- a/llvm/include/llvm/Option/Option.h +++ b/llvm/include/llvm/Option/Option.h @@ -213,14 +213,16 @@ public: /// Index to the position where argument parsing should resume /// (even if the argument is missing values). /// - /// \param ArgSize The number of bytes taken up by the matched Option prefix - /// and name. This is used to determine where joined values - /// start. - Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const; + /// \p CurArg The argument to be matched. It may be shorter than the + /// underlying storage to represent a Joined argument. + /// \p GroupedShortOption If true, we are handling the fallback case of + /// parsing a prefix of the current argument as a short option. + Arg *accept(const ArgList &Args, StringRef CurArg, bool GroupedShortOption, + unsigned &Index) const; private: - Arg *acceptInternal(const ArgList &Args, unsigned &Index, - unsigned ArgSize) const; + Arg *acceptInternal(const ArgList &Args, StringRef CurArg, + unsigned &Index) const; public: void print(raw_ostream &O) const; diff --git a/llvm/include/llvm/Pass.h b/llvm/include/llvm/Pass.h index 2fe7aee2e37e..8aa9ba90a9ca 100644 --- a/llvm/include/llvm/Pass.h +++ b/llvm/include/llvm/Pass.h @@ -69,6 +69,20 @@ enum PassKind { PT_PassManager }; +/// This enumerates the LLVM full LTO or ThinLTO optimization phases. +enum class ThinOrFullLTOPhase { + /// No LTO/ThinLTO behavior needed. + None, + /// ThinLTO prelink (summary) phase. + ThinLTOPreLink, + /// ThinLTO postlink (backend compile) phase. + ThinLTOPostLink, + /// Full LTO prelink phase. + FullLTOPreLink, + /// Full LTO postlink (backend compile) phase. + FullLTOPostLink +}; + //===----------------------------------------------------------------------===// /// Pass interface - Implemented by all 'passes'. Subclass this if you are an /// interprocedural optimization or you do not fit into any of the more @@ -309,6 +323,12 @@ protected: /// then the value of this boolean will be true, otherwise false. /// This is the storage for the -time-passes option. extern bool TimePassesIsEnabled; +/// If TimePassesPerRun is true, there would be one line of report for +/// each pass invocation. +/// If TimePassesPerRun is false, there would be only one line of +/// report for each pass (even there are more than one pass objects). +/// (For new pass manager only) +extern bool TimePassesPerRun; } // end namespace llvm diff --git a/llvm/include/llvm/PassAnalysisSupport.h b/llvm/include/llvm/PassAnalysisSupport.h index 84df171d38d8..4bed3cb55a90 100644 --- a/llvm/include/llvm/PassAnalysisSupport.h +++ b/llvm/include/llvm/PassAnalysisSupport.h @@ -17,11 +17,12 @@ #if !defined(LLVM_PASS_H) || defined(LLVM_PASSANALYSISSUPPORT_H) #error "Do not include <PassAnalysisSupport.h>; include <Pass.h> instead" -#endif +#endif #ifndef LLVM_PASSANALYSISSUPPORT_H #define LLVM_PASSANALYSISSUPPORT_H +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include <cassert> #include <tuple> @@ -58,6 +59,11 @@ private: SmallVector<AnalysisID, 0> Used; bool PreservesAll = false; + void pushUnique(VectorType &Set, AnalysisID ID) { + if (!llvm::is_contained(Set, ID)) + Set.push_back(ID); + } + public: AnalysisUsage() = default; @@ -80,17 +86,17 @@ public: ///@{ /// Add the specified ID to the set of analyses preserved by this pass. AnalysisUsage &addPreservedID(const void *ID) { - Preserved.push_back(ID); + pushUnique(Preserved, ID); return *this; } AnalysisUsage &addPreservedID(char &ID) { - Preserved.push_back(&ID); + pushUnique(Preserved, &ID); return *this; } /// Add the specified Pass class to the set of analyses preserved by this pass. template<class PassClass> AnalysisUsage &addPreserved() { - Preserved.push_back(&PassClass::ID); + pushUnique(Preserved, &PassClass::ID); return *this; } ///@} @@ -99,17 +105,17 @@ public: /// Add the specified ID to the set of analyses used by this pass if they are /// available.. AnalysisUsage &addUsedIfAvailableID(const void *ID) { - Used.push_back(ID); + pushUnique(Used, ID); return *this; } AnalysisUsage &addUsedIfAvailableID(char &ID) { - Used.push_back(&ID); + pushUnique(Used, &ID); return *this; } /// Add the specified Pass class to the set of analyses used by this pass. template<class PassClass> AnalysisUsage &addUsedIfAvailable() { - Used.push_back(&PassClass::ID); + pushUnique(Used, &PassClass::ID); return *this; } ///@} @@ -183,7 +189,7 @@ public: } /// Return analysis result or null if it doesn't exist. - Pass *getAnalysisIfAvailable(AnalysisID ID, bool Direction) const; + Pass *getAnalysisIfAvailable(AnalysisID ID) const; private: /// This keeps track of which passes implements the interfaces that are @@ -207,7 +213,7 @@ AnalysisType *Pass::getAnalysisIfAvailable() const { const void *PI = &AnalysisType::ID; - Pass *ResultPass = Resolver->getAnalysisIfAvailable(PI, true); + Pass *ResultPass = Resolver->getAnalysisIfAvailable(PI); if (!ResultPass) return nullptr; // Because the AnalysisType may not be a subclass of pass (for diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 0357e4a2fc05..28f9e83bf76a 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -36,11 +36,15 @@ struct PGOOptions { enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse }; PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "", std::string ProfileRemappingFile = "", PGOAction Action = NoAction, - CSPGOAction CSAction = NoCSAction, bool SamplePGOSupport = false) + CSPGOAction CSAction = NoCSAction, + bool DebugInfoForProfiling = false, + bool PseudoProbeForProfiling = false) : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), ProfileRemappingFile(ProfileRemappingFile), Action(Action), - CSAction(CSAction), - SamplePGOSupport(SamplePGOSupport || Action == SampleUse) { + CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling || + (Action == SampleUse && + !PseudoProbeForProfiling)), + PseudoProbeForProfiling(PseudoProbeForProfiling) { // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can // callback with IRUse action without ProfileFile. @@ -55,16 +59,26 @@ struct PGOOptions { // a profile. assert(this->CSAction != CSIRUse || this->Action == IRUse); - // If neither Action nor CSAction, SamplePGOSupport needs to be true. + // If neither Action nor CSAction, DebugInfoForProfiling or + // PseudoProbeForProfiling needs to be true. assert(this->Action != NoAction || this->CSAction != NoCSAction || - this->SamplePGOSupport); + this->DebugInfoForProfiling || this->PseudoProbeForProfiling); + + // Pseudo probe emission does not work with -fdebug-info-for-profiling since + // they both use the discriminator field of debug lines but for different + // purposes. + if (this->DebugInfoForProfiling && this->PseudoProbeForProfiling) { + report_fatal_error( + "Pseudo probes cannot be used with -debug-info-for-profiling", false); + } } std::string ProfileFile; std::string CSProfileGenFile; std::string ProfileRemappingFile; PGOAction Action; CSPGOAction CSAction; - bool SamplePGOSupport; + bool DebugInfoForProfiling; + bool PseudoProbeForProfiling; }; /// Tunable parameters for passes in the default pipelines. @@ -109,6 +123,13 @@ public: /// Tuning option to enable/disable call graph profile. Its default value is /// that of the flag: `-enable-npm-call-graph-profile`. bool CallGraphProfile; + + /// Tuning option to enable/disable function merging. Its default value is + /// false. + bool MergeFunctions; + + /// Uniquefy function linkage name. Its default value is false. + bool UniqueLinkageNames; }; /// This class provides access to building LLVM's passes. @@ -118,6 +139,7 @@ public: /// of the built-in passes, and those may reference these members during /// construction. class PassBuilder { + bool DebugLogging; TargetMachine *TM; PipelineTuningOptions PTO; Optional<PGOOptions> PGOOpt; @@ -137,18 +159,6 @@ public: std::vector<PipelineElement> InnerPipeline; }; - /// ThinLTO phase. - /// - /// This enumerates the LLVM ThinLTO optimization phases. - enum class ThinLTOPhase { - /// No ThinLTO behavior needed. - None, - /// ThinLTO prelink (summary) phase. - PreLink, - /// ThinLTO postlink (backend compile) phase. - PostLink - }; - /// LLVM-provided high-level optimization levels. /// /// This enumerates the LLVM-provided high-level optimization levels. Each @@ -259,11 +269,10 @@ public: unsigned getSizeLevel() const { return SizeLevel; } }; - explicit PassBuilder(TargetMachine *TM = nullptr, + explicit PassBuilder(bool DebugLogging = false, TargetMachine *TM = nullptr, PipelineTuningOptions PTO = PipelineTuningOptions(), Optional<PGOOptions> PGOOpt = None, - PassInstrumentationCallbacks *PIC = nullptr) - : TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) {} + PassInstrumentationCallbacks *PIC = nullptr); /// Cross register the analysis managers through their proxies. /// @@ -321,8 +330,7 @@ public: /// \p Phase indicates the current ThinLTO phase. FunctionPassManager buildFunctionSimplificationPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, - bool DebugLogging = false); + ThinOrFullLTOPhase Phase); /// Construct the core LLVM module canonicalization and simplification /// pipeline. @@ -339,16 +347,13 @@ public: /// build them. /// /// \p Phase indicates the current ThinLTO phase. - ModulePassManager - buildModuleSimplificationPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, - bool DebugLogging = false); + ModulePassManager buildModuleSimplificationPipeline(OptimizationLevel Level, + ThinOrFullLTOPhase Phase); /// Construct the module pipeline that performs inlining as well as /// the inlining-driven cleanups. ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, - ThinLTOPhase Phase, - bool DebugLogging = false); + ThinOrFullLTOPhase Phase); /// Construct the core LLVM module optimization pipeline. /// @@ -364,7 +369,6 @@ public: /// require some transformations for semantic reasons, they should explicitly /// build them. ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, - bool DebugLogging = false, bool LTOPreLink = false); /// Build a per-module default optimization pipeline. @@ -379,7 +383,6 @@ public: /// require some transformations for semantic reasons, they should explicitly /// build them. ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging = false, bool LTOPreLink = false); /// Build a pre-link, ThinLTO-targeting default optimization pipeline to @@ -394,9 +397,7 @@ public: /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. - ModulePassManager - buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, - bool DebugLogging = false); + ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level); /// Build an ThinLTO default optimization pipeline to a pass manager. /// @@ -410,7 +411,7 @@ public: /// require some transformations for semantic reasons, they should explicitly /// build them. ModulePassManager - buildThinLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, + buildThinLTODefaultPipeline(OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary); /// Build a pre-link, LTO-targeting default optimization pipeline to a pass @@ -425,8 +426,7 @@ public: /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. - ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level, - bool DebugLogging = false); + ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level); /// Build an LTO default optimization pipeline to a pass manager. /// @@ -440,11 +440,19 @@ public: /// require some transformations for semantic reasons, they should explicitly /// build them. ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, - bool DebugLogging, ModuleSummaryIndex *ExportSummary); + /// Build an O0 pipeline with the minimal semantically required passes. + /// + /// This should only be used for non-LTO and LTO pre-link pipelines. + ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, + bool LTOPreLink = false); + /// Build the default `AAManager` with the default alias analysis pipeline /// registered. + /// + /// This also adds target-specific alias analyses registered via + /// TargetMachine::registerDefaultAliasAnalyses(). AAManager buildDefaultAAPipeline(); /// Parse a textual pass pipeline description into a \c @@ -472,13 +480,22 @@ public: /// module(function(loop(lpass1,lpass2,lpass3))) /// /// This shortcut is especially useful for debugging and testing small pass - /// combinations. Note that these shortcuts don't introduce any other magic. - /// If the sequence of passes aren't all the exact same kind of pass, it will - /// be an error. You cannot mix different levels implicitly, you must - /// explicitly form a pass manager in which to nest passes. - Error parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText, - bool VerifyEachPass = true, - bool DebugLogging = false); + /// combinations. + /// + /// The sequence of passes aren't necessarily the exact same kind of pass. + /// You can mix different levels implicitly if adaptor passes are defined to + /// make them work. For example, + /// + /// mpass1,fpass1,fpass2,mpass2,lpass1 + /// + /// This pipeline uses only one pass manager: the top-level module manager. + /// fpass1,fpass2 and lpass1 are added into the the top-level module manager + /// using only adaptor passes. No nested function/loop pass managers are + /// added. The purpose is to allow easy pass testing when the user + /// specifically want the pass to run under a adaptor directly. This is + /// preferred when a pipeline is largely of one type, but one or just a few + /// passes are of different types(See PassBuilder.cpp for examples). + Error parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText); /// {{@ Parse a textual pass pipeline description into a specific PassManager /// @@ -487,15 +504,9 @@ public: /// this is the valid pipeline text: /// /// function(lpass) - Error parsePassPipeline(CGSCCPassManager &CGPM, StringRef PipelineText, - bool VerifyEachPass = true, - bool DebugLogging = false); - Error parsePassPipeline(FunctionPassManager &FPM, StringRef PipelineText, - bool VerifyEachPass = true, - bool DebugLogging = false); - Error parsePassPipeline(LoopPassManager &LPM, StringRef PipelineText, - bool VerifyEachPass = true, - bool DebugLogging = false); + Error parsePassPipeline(CGSCCPassManager &CGPM, StringRef PipelineText); + Error parsePassPipeline(FunctionPassManager &FPM, StringRef PipelineText); + Error parsePassPipeline(LoopPassManager &LPM, StringRef PipelineText); /// @}} /// Parse a textual alias analysis pipeline into the provided AA manager. @@ -594,17 +605,23 @@ public: /// pipeline. This does not apply to 'backend' compiles (LTO and ThinLTO /// link-time pipelines). void registerPipelineStartEPCallback( - const std::function<void(ModulePassManager &)> &C) { + const std::function<void(ModulePassManager &, OptimizationLevel)> &C) { PipelineStartEPCallbacks.push_back(C); } + /// Register a callback for a default optimizer pipeline extension point. + /// + /// This extension point allows adding optimization right after passes that do + /// basic simplification of the input IR. + void registerPipelineEarlySimplificationEPCallback( + const std::function<void(ModulePassManager &, OptimizationLevel)> &C) { + PipelineEarlySimplificationEPCallbacks.push_back(C); + } + /// Register a callback for a default optimizer pipeline extension point /// /// This extension point allows adding optimizations at the very end of the - /// function optimization pipeline. A key difference between this and the - /// legacy PassManager's OptimizerLast callback is that this extension point - /// is not triggered at O0. Extensions to the O0 pipeline should append their - /// passes to the end of the overall pipeline. + /// function optimization pipeline. void registerOptimizerLastEPCallback( const std::function<void(ModulePassManager &, OptimizationLevel)> &C) { OptimizerLastEPCallbacks.push_back(C); @@ -671,17 +688,13 @@ public: /// PassManagers and populate the passed ModulePassManager. void registerParseTopLevelPipelineCallback( const std::function<bool(ModulePassManager &, ArrayRef<PipelineElement>, - bool VerifyEachPass, bool DebugLogging)> &C) { - TopLevelPipelineParsingCallbacks.push_back(C); - } + bool DebugLogging)> &C); /// Add PGOInstrumenation passes for O0 only. - void addPGOInstrPassesForO0(ModulePassManager &MPM, bool DebugLogging, - bool RunProfileGen, bool IsCS, - std::string ProfileFile, + void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, + bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile); - /// Returns PIC. External libraries can use this to register pass /// instrumentation callbacks. PassInstrumentationCallbacks *getPassInstrumentationCallbacks() const { @@ -690,38 +703,32 @@ public: private: // O1 pass pipeline - FunctionPassManager buildO1FunctionSimplificationPipeline( - OptimizationLevel Level, ThinLTOPhase Phase, bool DebugLogging = false); + FunctionPassManager + buildO1FunctionSimplificationPipeline(OptimizationLevel Level, + ThinOrFullLTOPhase Phase); + + void addRequiredLTOPreLinkPasses(ModulePassManager &MPM); static Optional<std::vector<PipelineElement>> parsePipelineText(StringRef Text); - Error parseModulePass(ModulePassManager &MPM, const PipelineElement &E, - bool VerifyEachPass, bool DebugLogging); - Error parseCGSCCPass(CGSCCPassManager &CGPM, const PipelineElement &E, - bool VerifyEachPass, bool DebugLogging); - Error parseFunctionPass(FunctionPassManager &FPM, const PipelineElement &E, - bool VerifyEachPass, bool DebugLogging); - Error parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, - bool VerifyEachPass, bool DebugLogging); + Error parseModulePass(ModulePassManager &MPM, const PipelineElement &E); + Error parseCGSCCPass(CGSCCPassManager &CGPM, const PipelineElement &E); + Error parseFunctionPass(FunctionPassManager &FPM, const PipelineElement &E); + Error parseLoopPass(LoopPassManager &LPM, const PipelineElement &E); bool parseAAPassName(AAManager &AA, StringRef Name); Error parseLoopPassPipeline(LoopPassManager &LPM, - ArrayRef<PipelineElement> Pipeline, - bool VerifyEachPass, bool DebugLogging); + ArrayRef<PipelineElement> Pipeline); Error parseFunctionPassPipeline(FunctionPassManager &FPM, - ArrayRef<PipelineElement> Pipeline, - bool VerifyEachPass, bool DebugLogging); + ArrayRef<PipelineElement> Pipeline); Error parseCGSCCPassPipeline(CGSCCPassManager &CGPM, - ArrayRef<PipelineElement> Pipeline, - bool VerifyEachPass, bool DebugLogging); + ArrayRef<PipelineElement> Pipeline); Error parseModulePassPipeline(ModulePassManager &MPM, - ArrayRef<PipelineElement> Pipeline, - bool VerifyEachPass, bool DebugLogging); + ArrayRef<PipelineElement> Pipeline); - void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, - OptimizationLevel Level, bool RunProfileGen, bool IsCS, - std::string ProfileFile, + void addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level, + bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile); void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel); @@ -741,8 +748,11 @@ private: SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2> OptimizerLastEPCallbacks; // Module callbacks - SmallVector<std::function<void(ModulePassManager &)>, 2> + SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2> PipelineStartEPCallbacks; + SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2> + PipelineEarlySimplificationEPCallbacks; + SmallVector<std::function<void(ModuleAnalysisManager &)>, 2> ModuleAnalysisRegistrationCallbacks; SmallVector<std::function<bool(StringRef, ModulePassManager &, @@ -750,7 +760,7 @@ private: 2> ModulePipelineParsingCallbacks; SmallVector<std::function<bool(ModulePassManager &, ArrayRef<PipelineElement>, - bool VerifyEachPass, bool DebugLogging)>, + bool DebugLogging)>, 2> TopLevelPipelineParsingCallbacks; // CGSCC callbacks diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 3d3002eecce9..795a980878e2 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -16,8 +16,12 @@ #define LLVM_PASSES_STANDARDINSTRUMENTATIONS_H #include "llvm/ADT/SmallVector.h" -#include "llvm/IR/PassInstrumentation.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/OptBisect.h" #include "llvm/IR/PassTimingInfo.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Support/CommandLine.h" #include <string> #include <utility> @@ -25,6 +29,8 @@ namespace llvm { class Module; +class Function; +class PassInstrumentationCallbacks; /// Instrumentation to print IR before/after passes. /// @@ -32,40 +38,256 @@ class Module; /// (typically Loop or SCC). class PrintIRInstrumentation { public: - PrintIRInstrumentation() = default; ~PrintIRInstrumentation(); void registerCallbacks(PassInstrumentationCallbacks &PIC); private: - bool printBeforePass(StringRef PassID, Any IR); + void printBeforePass(StringRef PassID, Any IR); void printAfterPass(StringRef PassID, Any IR); void printAfterPassInvalidated(StringRef PassID); + bool shouldPrintBeforePass(StringRef PassID); + bool shouldPrintAfterPass(StringRef PassID); + using PrintModuleDesc = std::tuple<const Module *, std::string, StringRef>; void pushModuleDesc(StringRef PassID, Any IR); PrintModuleDesc popModuleDesc(StringRef PassID); + PassInstrumentationCallbacks *PIC; /// Stack of Module description, enough to print the module after a given /// pass. SmallVector<PrintModuleDesc, 2> ModuleDescStack; bool StoreModuleDesc = false; }; +class OptNoneInstrumentation { +public: + OptNoneInstrumentation(bool DebugLogging) : DebugLogging(DebugLogging) {} + void registerCallbacks(PassInstrumentationCallbacks &PIC); + +private: + bool DebugLogging; + bool shouldRun(StringRef PassID, Any IR); +}; + +class OptBisectInstrumentation { +public: + OptBisectInstrumentation() {} + void registerCallbacks(PassInstrumentationCallbacks &PIC); +}; + +// Debug logging for transformation and analysis passes. +class PrintPassInstrumentation { +public: + PrintPassInstrumentation(bool DebugLogging) : DebugLogging(DebugLogging) {} + void registerCallbacks(PassInstrumentationCallbacks &PIC); + +private: + bool DebugLogging; +}; + +class PreservedCFGCheckerInstrumentation { +private: + // CFG is a map BB -> {(Succ, Multiplicity)}, where BB is a non-leaf basic + // block, {(Succ, Multiplicity)} set of all pairs of the block's successors + // and the multiplicity of the edge (BB->Succ). As the mapped sets are + // unordered the order of successors is not tracked by the CFG. In other words + // this allows basic block successors to be swapped by a pass without + // reporting a CFG change. CFG can be guarded by basic block tracking pointers + // in the Graph (BBGuard). That is if any of the block is deleted or RAUWed + // then the CFG is treated poisoned and no block pointer of the Graph is used. + struct CFG { + struct BBGuard final : public CallbackVH { + BBGuard(const BasicBlock *BB) : CallbackVH(BB) {} + void deleted() override { CallbackVH::deleted(); } + void allUsesReplacedWith(Value *) override { CallbackVH::deleted(); } + bool isPoisoned() const { return !getValPtr(); } + }; + + Optional<DenseMap<intptr_t, BBGuard>> BBGuards; + DenseMap<const BasicBlock *, DenseMap<const BasicBlock *, unsigned>> Graph; + + CFG(const Function *F, bool TrackBBLifetime = false); + + bool operator==(const CFG &G) const { + return !isPoisoned() && !G.isPoisoned() && Graph == G.Graph; + } + + bool isPoisoned() const { + if (BBGuards) + for (auto &BB : *BBGuards) { + if (BB.second.isPoisoned()) + return true; + } + return false; + } + + static void printDiff(raw_ostream &out, const CFG &Before, + const CFG &After); + }; + + SmallVector<std::pair<StringRef, Optional<CFG>>, 8> GraphStackBefore; + +public: + static cl::opt<bool> VerifyPreservedCFG; + void registerCallbacks(PassInstrumentationCallbacks &PIC); +}; + +// Base class for classes that report changes to the IR. +// It presents an interface for such classes and provides calls +// on various events as the new pass manager transforms the IR. +// It also provides filtering of information based on hidden options +// specifying which functions are interesting. +// Calls are made for the following events/queries: +// 1. The initial IR processed. +// 2. To get the representation of the IR (of type \p T). +// 3. When a pass does not change the IR. +// 4. When a pass changes the IR (given both before and after representations +// of type \p T). +// 5. When an IR is invalidated. +// 6. When a pass is run on an IR that is not interesting (based on options). +// 7. When a pass is ignored (pass manager or adapter pass). +// 8. To compare two IR representations (of type \p T). +template <typename IRUnitT> class ChangeReporter { +protected: + ChangeReporter(bool RunInVerboseMode) : VerboseMode(RunInVerboseMode) {} + +public: + virtual ~ChangeReporter(); + + // Determine if this pass/IR is interesting and if so, save the IR + // otherwise it is left on the stack without data. + void saveIRBeforePass(Any IR, StringRef PassID); + // Compare the IR from before the pass after the pass. + void handleIRAfterPass(Any IR, StringRef PassID); + // Handle the situation where a pass is invalidated. + void handleInvalidatedPass(StringRef PassID); + +protected: + // Register required callbacks. + void registerRequiredCallbacks(PassInstrumentationCallbacks &PIC); + + // Return true when this is a defined function for which printing + // of changes is desired. + bool isInterestingFunction(const Function &F); + + // Return true when this is a pass for which printing of changes is desired. + bool isInterestingPass(StringRef PassID); + + // Return true when this is a pass on IR for which printing + // of changes is desired. + bool isInteresting(Any IR, StringRef PassID); + + // Called on the first IR processed. + virtual void handleInitialIR(Any IR) = 0; + // Called before and after a pass to get the representation of the IR. + virtual void generateIRRepresentation(Any IR, StringRef PassID, + IRUnitT &Output) = 0; + // Called when the pass is not iteresting. + virtual void omitAfter(StringRef PassID, std::string &Name) = 0; + // Called when an interesting IR has changed. + virtual void handleAfter(StringRef PassID, std::string &Name, + const IRUnitT &Before, const IRUnitT &After, + Any) = 0; + // Called when an interesting pass is invalidated. + virtual void handleInvalidated(StringRef PassID) = 0; + // Called when the IR or pass is not interesting. + virtual void handleFiltered(StringRef PassID, std::string &Name) = 0; + // Called when an ignored pass is encountered. + virtual void handleIgnored(StringRef PassID, std::string &Name) = 0; + // Called to compare the before and after representations of the IR. + virtual bool same(const IRUnitT &Before, const IRUnitT &After) = 0; + + // Stack of IRs before passes. + std::vector<IRUnitT> BeforeStack; + // Is this the first IR seen? + bool InitialIR = true; + + // Run in verbose mode, printing everything? + const bool VerboseMode; +}; + +// An abstract template base class that handles printing banners and +// reporting when things have not changed or are filtered out. +template <typename IRUnitT> +class TextChangeReporter : public ChangeReporter<IRUnitT> { +protected: + TextChangeReporter(bool Verbose); + + // Print a module dump of the first IR that is changed. + void handleInitialIR(Any IR) override; + // Report that the IR was omitted because it did not change. + void omitAfter(StringRef PassID, std::string &Name) override; + // Report that the pass was invalidated. + void handleInvalidated(StringRef PassID) override; + // Report that the IR was filtered out. + void handleFiltered(StringRef PassID, std::string &Name) override; + // Report that the pass was ignored. + void handleIgnored(StringRef PassID, std::string &Name) override; + // Make substitutions in \p S suitable for reporting changes + // after the pass and then print it. + + raw_ostream &Out; +}; + +// A change printer based on the string representation of the IR as created +// by unwrapAndPrint. The string representation is stored in a std::string +// to preserve it as the IR changes in each pass. Note that the banner is +// included in this representation but it is massaged before reporting. +class IRChangedPrinter : public TextChangeReporter<std::string> { +public: + IRChangedPrinter(bool VerboseMode) + : TextChangeReporter<std::string>(VerboseMode) {} + ~IRChangedPrinter() override; + void registerCallbacks(PassInstrumentationCallbacks &PIC); + +protected: + // Called before and after a pass to get the representation of the IR. + void generateIRRepresentation(Any IR, StringRef PassID, + std::string &Output) override; + // Called when an interesting IR has changed. + void handleAfter(StringRef PassID, std::string &Name, + const std::string &Before, const std::string &After, + Any) override; + // Called to compare the before and after representations of the IR. + bool same(const std::string &Before, const std::string &After) override; +}; + +class VerifyInstrumentation { + bool DebugLogging; + +public: + VerifyInstrumentation(bool DebugLogging) : DebugLogging(DebugLogging) {} + void registerCallbacks(PassInstrumentationCallbacks &PIC); +}; + /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { PrintIRInstrumentation PrintIR; + PrintPassInstrumentation PrintPass; TimePassesHandler TimePasses; + OptNoneInstrumentation OptNone; + OptBisectInstrumentation OptBisect; + PreservedCFGCheckerInstrumentation PreservedCFGChecker; + IRChangedPrinter PrintChangedIR; + VerifyInstrumentation Verify; + + bool VerifyEach; public: - StandardInstrumentations() = default; + StandardInstrumentations(bool DebugLogging, bool VerifyEach = false); void registerCallbacks(PassInstrumentationCallbacks &PIC); TimePassesHandler &getTimePasses() { return TimePasses; } }; + +extern template class ChangeReporter<std::string>; +extern template class TextChangeReporter<std::string>; + } // namespace llvm #endif diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index bf0dffc9653c..09f21677ec54 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -55,7 +55,8 @@ enum class coveragemap_error { unsupported_version, truncated, malformed, - decompression_failed + decompression_failed, + invalid_or_missing_arch_specifier }; const std::error_category &coveragemap_category(); @@ -89,6 +90,8 @@ private: /// A Counter is an abstract value that describes how to compute the /// execution count for a region of code using the collected profile count data. struct Counter { + /// The CounterExpression kind (Add or Subtract) is encoded in bit 0 next to + /// the CounterKind. This means CounterKind has to leave bit 0 free. enum CounterKind { Zero, CounterValueReference, Expression }; static const unsigned EncodingTagBits = 2; static const unsigned EncodingTagMask = 0x3; @@ -218,10 +221,20 @@ struct CounterMappingRegion { /// A GapRegion is like a CodeRegion, but its count is only set as the /// line execution count when its the only region in the line. - GapRegion + GapRegion, + + /// A BranchRegion represents leaf-level boolean expressions and is + /// associated with two counters, each representing the number of times the + /// expression evaluates to true or false. + BranchRegion }; + /// Primary Counter that is also used for Branch Regions (TrueCount). Counter Count; + + /// Secondary Counter used for Branch Regions (FalseCount). + Counter FalseCount; + unsigned FileID, ExpandedFileID; unsigned LineStart, ColumnStart, LineEnd, ColumnEnd; RegionKind Kind; @@ -233,6 +246,15 @@ struct CounterMappingRegion { LineStart(LineStart), ColumnStart(ColumnStart), LineEnd(LineEnd), ColumnEnd(ColumnEnd), Kind(Kind) {} + CounterMappingRegion(Counter Count, Counter FalseCount, unsigned FileID, + unsigned ExpandedFileID, unsigned LineStart, + unsigned ColumnStart, unsigned LineEnd, + unsigned ColumnEnd, RegionKind Kind) + : Count(Count), FalseCount(FalseCount), FileID(FileID), + ExpandedFileID(ExpandedFileID), LineStart(LineStart), + ColumnStart(ColumnStart), LineEnd(LineEnd), ColumnEnd(ColumnEnd), + Kind(Kind) {} + static CounterMappingRegion makeRegion(Counter Count, unsigned FileID, unsigned LineStart, unsigned ColumnStart, unsigned LineEnd, unsigned ColumnEnd) { @@ -262,6 +284,14 @@ struct CounterMappingRegion { LineEnd, (1U << 31) | ColumnEnd, GapRegion); } + static CounterMappingRegion + makeBranchRegion(Counter Count, Counter FalseCount, unsigned FileID, + unsigned LineStart, unsigned ColumnStart, unsigned LineEnd, + unsigned ColumnEnd) { + return CounterMappingRegion(Count, FalseCount, FileID, 0, LineStart, + ColumnStart, LineEnd, ColumnEnd, BranchRegion); + } + inline LineColPair startLoc() const { return LineColPair(LineStart, ColumnStart); } @@ -272,9 +302,17 @@ struct CounterMappingRegion { /// Associates a source range with an execution count. struct CountedRegion : public CounterMappingRegion { uint64_t ExecutionCount; + uint64_t FalseExecutionCount; + bool Folded; CountedRegion(const CounterMappingRegion &R, uint64_t ExecutionCount) - : CounterMappingRegion(R), ExecutionCount(ExecutionCount) {} + : CounterMappingRegion(R), ExecutionCount(ExecutionCount), + FalseExecutionCount(0), Folded(false) {} + + CountedRegion(const CounterMappingRegion &R, uint64_t ExecutionCount, + uint64_t FalseExecutionCount) + : CounterMappingRegion(R), ExecutionCount(ExecutionCount), + FalseExecutionCount(FalseExecutionCount), Folded(false) {} }; /// A Counter mapping context is used to connect the counters, expressions @@ -311,6 +349,8 @@ struct FunctionRecord { std::vector<std::string> Filenames; /// Regions in the function along with their counts. std::vector<CountedRegion> CountedRegions; + /// Branch Regions in the function along with their counts. + std::vector<CountedRegion> CountedBranchRegions; /// The number of times this function was executed. uint64_t ExecutionCount = 0; @@ -320,10 +360,19 @@ struct FunctionRecord { FunctionRecord(FunctionRecord &&FR) = default; FunctionRecord &operator=(FunctionRecord &&) = default; - void pushRegion(CounterMappingRegion Region, uint64_t Count) { + void pushRegion(CounterMappingRegion Region, uint64_t Count, + uint64_t FalseCount) { + if (Region.Kind == CounterMappingRegion::BranchRegion) { + CountedBranchRegions.emplace_back(Region, Count, FalseCount); + // If both counters are hard-coded to zero, then this region represents a + // constant-folded branch. + if (Region.Count.isZero() && Region.FalseCount.isZero()) + CountedBranchRegions.back().Folded = true; + return; + } if (CountedRegions.empty()) ExecutionCount = Count; - CountedRegions.emplace_back(Region, Count); + CountedRegions.emplace_back(Region, Count, FalseCount); } }; @@ -402,7 +451,8 @@ struct CoverageSegment { IsRegionEntry(IsRegionEntry), IsGapRegion(false) {} CoverageSegment(unsigned Line, unsigned Col, uint64_t Count, - bool IsRegionEntry, bool IsGapRegion = false) + bool IsRegionEntry, bool IsGapRegion = false, + bool IsBranchRegion = false) : Line(Line), Col(Col), Count(Count), HasCount(true), IsRegionEntry(IsRegionEntry), IsGapRegion(IsGapRegion) {} @@ -482,6 +532,7 @@ class CoverageData { std::string Filename; std::vector<CoverageSegment> Segments; std::vector<ExpansionRecord> Expansions; + std::vector<CountedRegion> BranchRegions; public: CoverageData() = default; @@ -505,6 +556,9 @@ public: /// Expansions that can be further processed. ArrayRef<ExpansionRecord> getExpansions() const { return Expansions; } + + /// Branches that can be further processed. + ArrayRef<CountedRegion> getBranches() const { return BranchRegions; } }; /// The mapping of profile information to coverage data. @@ -940,7 +994,9 @@ enum CovMapVersion { Version3 = 2, // Function records are named, uniqued, and moved to a dedicated section. Version4 = 3, - // The current version is Version4. + // Branch regions referring to two counters are added + Version5 = 4, + // The current version is Version5. CurrentVersion = INSTR_PROF_COVMAP_VERSION }; diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h index 97f4c32eb035..3a611bcb8cd1 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h @@ -67,10 +67,10 @@ public: increment(); return *this; } - bool operator==(const CoverageMappingIterator &RHS) { + bool operator==(const CoverageMappingIterator &RHS) const { return Reader == RHS.Reader; } - bool operator!=(const CoverageMappingIterator &RHS) { + bool operator!=(const CoverageMappingIterator &RHS) const { return Reader != RHS.Reader; } Expected<CoverageMappingRecord &> operator*() { diff --git a/llvm/include/llvm/ProfileData/GCOV.h b/llvm/include/llvm/ProfileData/GCOV.h index 7b9ba4410b65..d4f0b9120577 100644 --- a/llvm/include/llvm/ProfileData/GCOV.h +++ b/llvm/include/llvm/ProfileData/GCOV.h @@ -15,6 +15,7 @@ #define LLVM_PROFILEDATA_GCOV_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -38,7 +39,6 @@ namespace llvm { class GCOVFunction; class GCOVBlock; -class FileInfo; namespace GCOV { @@ -47,10 +47,11 @@ enum GCOVVersion { V304, V407, V408, V800, V900 }; /// A struct for passing gcov options between functions. struct Options { Options(bool A, bool B, bool C, bool F, bool P, bool U, bool I, bool L, - bool N, bool T, bool X) + bool M, bool N, bool R, bool T, bool X, std::string SourcePrefix) : AllBlocks(A), BranchInfo(B), BranchCount(C), FuncCoverage(F), PreservePaths(P), UncondBranch(U), Intermediate(I), LongFileNames(L), - NoOutput(N), UseStdout(T), HashFilenames(X) {} + Demangle(M), NoOutput(N), RelativeOnly(R), UseStdout(T), + HashFilenames(X), SourcePrefix(std::move(SourcePrefix)) {} bool AllBlocks; bool BranchInfo; @@ -60,9 +61,12 @@ struct Options { bool UncondBranch; bool Intermediate; bool LongFileNames; + bool Demangle; bool NoOutput; + bool RelativeOnly; bool UseStdout; bool HashFilenames; + std::string SourcePrefix; }; } // end namespace GCOV @@ -187,39 +191,38 @@ public: bool readGCNO(GCOVBuffer &Buffer); bool readGCDA(GCOVBuffer &Buffer); GCOV::GCOVVersion getVersion() const { return Version; } - uint32_t getChecksum() const { return Checksum; } void print(raw_ostream &OS) const; void dump() const; - void collectLineCounts(FileInfo &FI); std::vector<std::string> filenames; StringMap<unsigned> filenameToIdx; -private: +public: bool GCNOInitialized = false; GCOV::GCOVVersion Version; uint32_t Checksum = 0; StringRef cwd; - SmallVector<std::unique_ptr<GCOVFunction>, 16> Functions; + SmallVector<std::unique_ptr<GCOVFunction>, 16> functions; std::map<uint32_t, GCOVFunction *> IdentToFunction; uint32_t RunCount = 0; uint32_t ProgramCount = 0; using iterator = pointee_iterator< SmallVectorImpl<std::unique_ptr<GCOVFunction>>::const_iterator>; - iterator begin() const { return iterator(Functions.begin()); } - iterator end() const { return iterator(Functions.end()); } + iterator begin() const { return iterator(functions.begin()); } + iterator end() const { return iterator(functions.end()); } }; struct GCOVArc { - GCOVArc(GCOVBlock &src, GCOVBlock &dst, bool fallthrough) - : src(src), dst(dst), fallthrough(fallthrough) {} + GCOVArc(GCOVBlock &src, GCOVBlock &dst, uint32_t flags) + : src(src), dst(dst), flags(flags) {} + bool onTree() const; GCOVBlock &src; GCOVBlock &dst; - bool fallthrough; - uint64_t Count = 0; - uint64_t CyclesCount = 0; + uint32_t flags; + uint64_t count = 0; + uint64_t cycleCount = 0; }; /// GCOVFunction - Collects function information. @@ -230,21 +233,18 @@ public: GCOVFunction(GCOVFile &file) : file(file) {} - StringRef getName() const { return Name; } + StringRef getName(bool demangle) const; StringRef getFilename() const; - size_t getNumBlocks() const { return Blocks.size(); } uint64_t getEntryCount() const; - uint64_t getExitCount() const; + GCOVBlock &getExitBlock() const; - BlockIterator block_begin() const { return Blocks.begin(); } - BlockIterator block_end() const { return Blocks.end(); } - iterator_range<BlockIterator> blocks() const { - return make_range(block_begin(), block_end()); + iterator_range<BlockIterator> blocksRange() const { + return make_range(blocks.begin(), blocks.end()); } + uint64_t propagateCounts(const GCOVBlock &v, GCOVArc *pred); void print(raw_ostream &OS) const; void dump() const; - void collectLineCounts(FileInfo &FI); GCOVFile &file; uint32_t ident = 0; @@ -256,40 +256,31 @@ public: uint32_t endColumn = 0; uint8_t artificial = 0; StringRef Name; + mutable SmallString<0> demangled; unsigned srcIdx; - SmallVector<std::unique_ptr<GCOVBlock>, 0> Blocks; + SmallVector<std::unique_ptr<GCOVBlock>, 0> blocks; SmallVector<std::unique_ptr<GCOVArc>, 0> arcs, treeArcs; + DenseSet<const GCOVBlock *> visited; }; /// GCOVBlock - Collects block information. class GCOVBlock { - struct EdgeWeight { - EdgeWeight(GCOVBlock *D) : Dst(D) {} - - GCOVBlock *Dst; - uint64_t Count = 0; - }; - public: using EdgeIterator = SmallVectorImpl<GCOVArc *>::const_iterator; - using BlockVector = SmallVector<const GCOVBlock *, 4>; + using BlockVector = SmallVector<const GCOVBlock *, 1>; using BlockVectorLists = SmallVector<BlockVector, 4>; using Edges = SmallVector<GCOVArc *, 4>; - GCOVBlock(GCOVFunction &P, uint32_t N) : Parent(P), Number(N) {} + GCOVBlock(uint32_t N) : number(N) {} - const GCOVFunction &getParent() const { return Parent; } - void addLine(uint32_t N) { Lines.push_back(N); } - uint32_t getLastLine() const { return Lines.back(); } - uint64_t getCount() const { return Counter; } + void addLine(uint32_t N) { lines.push_back(N); } + uint32_t getLastLine() const { return lines.back(); } + uint64_t getCount() const { return count; } void addSrcEdge(GCOVArc *Edge) { pred.push_back(Edge); } void addDstEdge(GCOVArc *Edge) { succ.push_back(Edge); } - size_t getNumSrcEdges() const { return pred.size(); } - size_t getNumDstEdges() const { return succ.size(); } - iterator_range<EdgeIterator> srcs() const { return make_range(pred.begin(), pred.end()); } @@ -300,116 +291,25 @@ public: void print(raw_ostream &OS) const; void dump() const; - void collectLineCounts(FileInfo &FI); - - static uint64_t getCycleCount(const Edges &Path); - static void unblock(const GCOVBlock *U, BlockVector &Blocked, - BlockVectorLists &BlockLists); - static bool lookForCircuit(const GCOVBlock *V, const GCOVBlock *Start, - Edges &Path, BlockVector &Blocked, - BlockVectorLists &BlockLists, - const BlockVector &Blocks, uint64_t &Count); - static void getCyclesCount(const BlockVector &Blocks, uint64_t &Count); + + static uint64_t + augmentOneCycle(GCOVBlock *src, + std::vector<std::pair<GCOVBlock *, size_t>> &stack); + static uint64_t getCyclesCount(const BlockVector &blocks); static uint64_t getLineCount(const BlockVector &Blocks); public: - GCOVFunction &Parent; - uint32_t Number; - uint64_t Counter = 0; + uint32_t number; + uint64_t count = 0; SmallVector<GCOVArc *, 2> pred; SmallVector<GCOVArc *, 2> succ; - SmallVector<uint32_t, 16> Lines; -}; - -struct GCOVCoverage { - GCOVCoverage() = default; - GCOVCoverage(StringRef Name) : Name(Name) {} - - StringRef Name; - - uint32_t LogicalLines = 0; - uint32_t LinesExec = 0; - - uint32_t Branches = 0; - uint32_t BranchesExec = 0; - uint32_t BranchesTaken = 0; -}; - -struct SourceInfo { - StringRef filename; - std::string name; - std::vector<GCOVFunction *> functions; - GCOVCoverage coverage; - SourceInfo(StringRef filename) : filename(filename) {} + SmallVector<uint32_t, 4> lines; + bool traversable = false; + GCOVArc *incoming = nullptr; }; -class FileInfo { -protected: - // It is unlikely--but possible--for multiple functions to be on the same - // line. - // Therefore this typedef allows LineData.Functions to store multiple - // functions - // per instance. This is rare, however, so optimize for the common case. - using FunctionVector = SmallVector<const GCOVFunction *, 1>; - using FunctionLines = DenseMap<uint32_t, FunctionVector>; - using BlockVector = SmallVector<const GCOVBlock *, 4>; - using BlockLines = DenseMap<uint32_t, BlockVector>; - - struct LineData { - LineData() = default; - - BlockLines Blocks; - FunctionLines Functions; - uint32_t LastLine = 0; - }; - -public: - friend class GCOVFile; - FileInfo(const GCOV::Options &Options) : Options(Options) {} - - void addBlockLine(StringRef Filename, uint32_t Line, const GCOVBlock *Block) { - if (Line > LineInfo[Filename].LastLine) - LineInfo[Filename].LastLine = Line; - LineInfo[Filename].Blocks[Line - 1].push_back(Block); - } - - void addFunctionLine(StringRef Filename, uint32_t Line, - const GCOVFunction *Function) { - if (Line > LineInfo[Filename].LastLine) - LineInfo[Filename].LastLine = Line; - LineInfo[Filename].Functions[Line - 1].push_back(Function); - } - - void setRunCount(uint32_t Runs) { RunCount = Runs; } - void setProgramCount(uint32_t Programs) { ProgramCount = Programs; } - void print(raw_ostream &OS, StringRef MainFilename, StringRef GCNOFile, - StringRef GCDAFile, GCOVFile &file); - -protected: - std::string getCoveragePath(StringRef Filename, StringRef MainFilename); - std::unique_ptr<raw_ostream> openCoveragePath(StringRef CoveragePath); - void printFunctionSummary(raw_ostream &OS, const FunctionVector &Funcs) const; - void printBlockInfo(raw_ostream &OS, const GCOVBlock &Block, - uint32_t LineIndex, uint32_t &BlockNo) const; - void printBranchInfo(raw_ostream &OS, const GCOVBlock &Block, - GCOVCoverage &Coverage, uint32_t &EdgeNo); - void printUncondBranchInfo(raw_ostream &OS, uint32_t &EdgeNo, - uint64_t Count) const; - - void printCoverage(raw_ostream &OS, const GCOVCoverage &Coverage) const; - void printFuncCoverage(raw_ostream &OS) const; - void printFileCoverage(raw_ostream &OS) const; - - const GCOV::Options &Options; - StringMap<LineData> LineInfo; - uint32_t RunCount = 0; - uint32_t ProgramCount = 0; - - using FuncCoverageMap = MapVector<const GCOVFunction *, GCOVCoverage>; - - FuncCoverageMap FuncCoverages; - std::vector<SourceInfo> sources; -}; +void gcovOneInput(const GCOV::Options &options, StringRef filename, + StringRef gcno, StringRef gcda, GCOVFile &file); } // end namespace llvm diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 62a0c6955708..9c16c353843d 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -74,9 +74,10 @@ inline StringRef getInstrProfValueProfFuncName() { return INSTR_PROF_VALUE_PROF_FUNC_STR; } -/// Return the name profile runtime entry point to do value range profiling. -inline StringRef getInstrProfValueRangeProfFuncName() { - return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR; +/// Return the name profile runtime entry point to do memop size value +/// profiling. +inline StringRef getInstrProfValueProfMemOpFuncName() { + return INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR; } /// Return the name prefix of variables containing instrumented function names. @@ -561,10 +562,9 @@ StringRef InstrProfSymtab::getFuncNameOrExternalSymbol(uint64_t FuncMD5Hash) { StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) { finalizeSymtab(); - auto Result = - std::lower_bound(MD5NameMap.begin(), MD5NameMap.end(), FuncMD5Hash, - [](const std::pair<uint64_t, StringRef> &LHS, - uint64_t RHS) { return LHS.first < RHS; }); + auto Result = llvm::lower_bound(MD5NameMap, FuncMD5Hash, + [](const std::pair<uint64_t, StringRef> &LHS, + uint64_t RHS) { return LHS.first < RHS; }); if (Result != MD5NameMap.end() && Result->first == FuncMD5Hash) return Result->second; return StringRef(); @@ -572,10 +572,9 @@ StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) { Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) { finalizeSymtab(); - auto Result = - std::lower_bound(MD5FuncMap.begin(), MD5FuncMap.end(), FuncMD5Hash, - [](const std::pair<uint64_t, Function*> &LHS, - uint64_t RHS) { return LHS.first < RHS; }); + auto Result = llvm::lower_bound(MD5FuncMap, FuncMD5Hash, + [](const std::pair<uint64_t, Function *> &LHS, + uint64_t RHS) { return LHS.first < RHS; }); if (Result != MD5FuncMap.end() && Result->first == FuncMD5Hash) return Result->second; return nullptr; @@ -678,8 +677,8 @@ struct InstrProfValueSiteRecord { /// Optionally scale merged counts by \p Weight. void merge(InstrProfValueSiteRecord &Input, uint64_t Weight, function_ref<void(instrprof_error)> Warn); - /// Scale up value profile data counts. - void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn); + /// Scale up value profile data counts by N (Numerator) / D (Denominator). + void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn); /// Compute the overlap b/w this record and Input record. void overlap(InstrProfValueSiteRecord &Input, uint32_t ValueKind, @@ -753,8 +752,8 @@ struct InstrProfRecord { function_ref<void(instrprof_error)> Warn); /// Scale up profile counts (including value profile data) by - /// \p Weight. - void scale(uint64_t Weight, function_ref<void(instrprof_error)> Warn); + /// a factor of (N / D). + void scale(uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn); /// Sort value profile data (per site) by count. void sortValueData() { @@ -839,8 +838,8 @@ private: uint64_t Weight, function_ref<void(instrprof_error)> Warn); - // Scale up value profile data count. - void scaleValueProfData(uint32_t ValueKind, uint64_t Weight, + // Scale up value profile data count by N (Numerator) / D (Denominator). + void scaleValueProfData(uint32_t ValueKind, uint64_t N, uint64_t D, function_ref<void(instrprof_error)> Warn); }; @@ -982,7 +981,9 @@ enum ProfVersion { // In this version, the frontend PGO stable hash algorithm got fixed and // may produce hashes different from Version5. Version6 = 6, - // The current version is 5. + // An additional counter is added around logical operators. + Version7 = 7, + // The current version is 7. CurrentVersion = INSTR_PROF_INDEX_VERSION }; const uint64_t Version = ProfVersion::CurrentVersion; @@ -1138,7 +1139,8 @@ void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime // aware this is an ir_level profile so it can set the version flag. -void createIRLevelProfileFlagVar(Module &M, bool IsCS); +void createIRLevelProfileFlagVar(Module &M, bool IsCS, + bool InstrEntryBBEnabled); // Create the variable for the profile file name. void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput); diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index a6913527e67f..f715505ba5e1 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -154,17 +154,7 @@ INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) VALUE_PROF_FUNC_PARAM(uint64_t, TargetValue, Type::getInt64Ty(Ctx)) \ INSTR_PROF_COMMA VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA -#ifndef VALUE_RANGE_PROF VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) -#else /* VALUE_RANGE_PROF */ -VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) \ - INSTR_PROF_COMMA -VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeStart, Type::getInt64Ty(Ctx)) \ - INSTR_PROF_COMMA -VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeLast, Type::getInt64Ty(Ctx)) \ - INSTR_PROF_COMMA -VALUE_PROF_FUNC_PARAM(uint64_t, LargeValue, Type::getInt64Ty(Ctx)) -#endif /*VALUE_RANGE_PROF */ #undef VALUE_PROF_FUNC_PARAM #undef INSTR_PROF_COMMA /* VALUE_PROF_FUNC_PARAM end */ @@ -657,9 +647,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, /* Raw profile format version (start from 1). */ #define INSTR_PROF_RAW_VERSION 5 /* Indexed profile format version (start from 1). */ -#define INSTR_PROF_INDEX_VERSION 6 +#define INSTR_PROF_INDEX_VERSION 7 /* Coverage mapping format version (start from 0). */ -#define INSTR_PROF_COVMAP_VERSION 3 +#define INSTR_PROF_COVMAP_VERSION 4 /* Profile version is always of type uint64_t. Reserve the upper 8 bits in the * version for other variants of profile. We set the lowest bit of the upper 8 @@ -671,6 +661,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) #define VARIANT_MASK_IR_PROF (0x1ULL << 56) #define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) +#define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime @@ -753,9 +744,9 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target #define INSTR_PROF_VALUE_PROF_FUNC_STR \ INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC) -#define INSTR_PROF_VALUE_RANGE_PROF_FUNC __llvm_profile_instrument_range -#define INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR \ - INSTR_PROF_QUOTE(INSTR_PROF_VALUE_RANGE_PROF_FUNC) +#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC __llvm_profile_instrument_memop +#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR \ + INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_MEMOP_FUNC) /* InstrProfile per-function control data alignment. */ #define INSTR_PROF_DATA_ALIGNMENT 8 @@ -783,3 +774,121 @@ typedef struct InstrProfValueData { #endif #undef COVMAP_V2_OR_V3 + +#ifdef INSTR_PROF_VALUE_PROF_MEMOP_API + +#ifdef __cplusplus +#define INSTR_PROF_INLINE inline +#else +#define INSTR_PROF_INLINE +#endif + +/* The value range buckets (22 buckets) for the memop size value profiling looks + * like: + * + * [0, 0] + * [1, 1] + * [2, 2] + * [3, 3] + * [4, 4] + * [5, 5] + * [6, 6] + * [7, 7] + * [8, 8] + * [9, 15] + * [16, 16] + * [17, 31] + * [32, 32] + * [33, 63] + * [64, 64] + * [65, 127] + * [128, 128] + * [129, 255] + * [256, 256] + * [257, 511] + * [512, 512] + * [513, UINT64_MAX] + * + * Each range has a 'representative value' which is the lower end value of the + * range and used to store in the runtime profile data records and the VP + * metadata. For example, it's 2 for [2, 2] and 64 for [65, 127]. + */ + +/* + * Clz and Popcount. This code was copied from + * compiler-rt/lib/fuzzer/{FuzzerBuiltins.h,FuzzerBuiltinsMsvc.h} and + * llvm/include/llvm/Support/MathExtras.h. + */ +#if defined(_MSC_VER) && !defined(__clang__) + +#include <intrin.h> +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfClzll(unsigned long long X) { + unsigned long LeadZeroIdx = 0; +#if !defined(_M_ARM64) && !defined(_M_X64) + // Scan the high 32 bits. + if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X >> 32))) + return (int)(63 - (LeadZeroIdx + 32)); // Create a bit offset + // from the MSB. + // Scan the low 32 bits. + if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X))) + return (int)(63 - LeadZeroIdx); +#else + if (_BitScanReverse64(&LeadZeroIdx, X)) return 63 - LeadZeroIdx; +#endif + return 64; +} +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfPopcountll(unsigned long long X) { + // This code originates from https://reviews.llvm.org/rG30626254510f. + unsigned long long v = X; + v = v - ((v >> 1) & 0x5555555555555555ULL); + v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); + v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; + return (int)((unsigned long long)(v * 0x0101010101010101ULL) >> 56); +} + +#else + +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfClzll(unsigned long long X) { return __builtin_clzll(X); } +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE +int InstProfPopcountll(unsigned long long X) { return __builtin_popcountll(X); } + +#endif /* defined(_MSC_VER) && !defined(__clang__) */ + +/* Map an (observed) memop size value to the representative value of its range. + * For example, 5 -> 5, 22 -> 17, 99 -> 65, 256 -> 256, 1001 -> 513. */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE uint64_t +InstrProfGetRangeRepValue(uint64_t Value) { + if (Value <= 8) + // The first ranges are individually tracked. Use the value as is. + return Value; + else if (Value >= 513) + // The last range is mapped to its lowest value. + return 513; + else if (InstProfPopcountll(Value) == 1) + // If it's a power of two, use it as is. + return Value; + else + // Otherwise, take to the previous power of two + 1. + return (1 << (64 - InstProfClzll(Value) - 1)) + 1; +} + +/* Return true if the range that an (observed) memop size value belongs to has + * only a single value in the range. For example, 0 -> true, 8 -> true, 10 -> + * false, 64 -> true, 100 -> false, 513 -> false. */ +INSTR_PROF_VISIBILITY INSTR_PROF_INLINE unsigned +InstrProfIsSingleValRange(uint64_t Value) { + if (Value <= 8) + // The first ranges are individually tracked. + return 1; + else if (InstProfPopcountll(Value) == 1) + // If it's a power of two, there's only one value. + return 1; + else + // Otherwise, there's more than one value in the range. + return 0; +} + +#endif /* INSTR_PROF_VALUE_PROF_MEMOP_API */ diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index f5f552672bf0..2c2cfb90d4fa 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -50,8 +50,12 @@ public: InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); } InstrProfIterator &operator++() { Increment(); return *this; } - bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; } - bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; } + bool operator==(const InstrProfIterator &RHS) const { + return Reader == RHS.Reader; + } + bool operator!=(const InstrProfIterator &RHS) const { + return Reader != RHS.Reader; + } value_type &operator*() { return Record; } value_type *operator->() { return &Record; } }; @@ -79,6 +83,8 @@ public: virtual bool hasCSIRLevelProfile() const = 0; + virtual bool instrEntryBBEnabled() const = 0; + /// Return the PGO symtab. There are three different readers: /// Raw, Text, and Indexed profile readers. The first two types /// of readers are used only by llvm-profdata tool, while the indexed @@ -148,6 +154,7 @@ private: line_iterator Line; bool IsIRLevelProfile = false; bool HasCSIRLevelProfile = false; + bool InstrEntryBBEnabled = false; Error readValueProfileData(InstrProfRecord &Record); @@ -164,6 +171,8 @@ public: bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; } + bool instrEntryBBEnabled() const override { return InstrEntryBBEnabled; } + /// Read the header. Error readHeader() override; @@ -224,6 +233,10 @@ public: return (Version & VARIANT_MASK_CSIR_PROF) != 0; } + bool instrEntryBBEnabled() const override { + return (Version & VARIANT_MASK_INSTR_ENTRY) != 0; + } + InstrProfSymtab &getSymtab() override { assert(Symtab.get()); return *Symtab.get(); @@ -360,6 +373,7 @@ struct InstrProfReaderIndexBase { virtual uint64_t getVersion() const = 0; virtual bool isIRLevelProfile() const = 0; virtual bool hasCSIRLevelProfile() const = 0; + virtual bool instrEntryBBEnabled() const = 0; virtual Error populateSymtab(InstrProfSymtab &) = 0; }; @@ -408,6 +422,10 @@ public: return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0; } + bool instrEntryBBEnabled() const override { + return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0; + } + Error populateSymtab(InstrProfSymtab &Symtab) override { return Symtab.create(HashTable->keys()); } @@ -462,6 +480,10 @@ public: return Index->hasCSIRLevelProfile(); } + bool instrEntryBBEnabled() const override { + return Index->instrEntryBBEnabled(); + } + /// Return true if the given buffer is in an indexed instrprof format. static bool hasFormat(const MemoryBuffer &DataBuffer); diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index 5882fa2781e2..35c2669d55a6 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -40,13 +40,16 @@ private: bool Sparse; StringMap<ProfilingData> FunctionData; ProfKind ProfileKind = PF_Unknown; + bool InstrEntryBBEnabled; // Use raw pointer here for the incomplete type object. InstrProfRecordWriterTrait *InfoObj; public: - InstrProfWriter(bool Sparse = false); + InstrProfWriter(bool Sparse = false, bool InstrEntryBBEnabled = false); ~InstrProfWriter(); + StringMap<ProfilingData> &getProfileData() { return FunctionData; } + /// Add function counts for the given function. If there are already counts /// for this function and the hash and number of counts match, each counter is /// summed. Optionally scale counts by \p Weight. @@ -97,6 +100,7 @@ public: return Error::success(); } + void setInstrEntryBBEnabled(bool Enabled) { InstrEntryBBEnabled = Enabled; } // Internal interface for testing purpose only. void setValueProfDataEndianness(support::endianness Endianness); void setOutputSparse(bool Sparse); diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h index 14c305b3d0c0..6bb5825339ae 100644 --- a/llvm/include/llvm/ProfileData/ProfileCommon.h +++ b/llvm/include/llvm/ProfileData/ProfileCommon.h @@ -33,8 +33,8 @@ class FunctionSamples; } // end namespace sampleprof -inline const char *getHotSectionPrefix() { return ".hot"; } -inline const char *getUnlikelySectionPrefix() { return ".unlikely"; } +inline const char *getHotSectionPrefix() { return "hot"; } +inline const char *getUnlikelySectionPrefix() { return "unlikely"; } class ProfileSummaryBuilder { private: diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 562468333ef4..c45ace9e68c1 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -37,8 +37,6 @@ namespace llvm { -class raw_ostream; - const std::error_category &sampleprof_category(); enum class sampleprof_error { @@ -56,7 +54,8 @@ enum class sampleprof_error { ostream_seek_unsupported, compress_failed, uncompress_failed, - zlib_unavailable + zlib_unavailable, + hash_mismatch }; inline std::error_code make_error_code(sampleprof_error E) { @@ -122,6 +121,7 @@ enum SecType { SecNameTable = 2, SecProfileSymbolList = 3, SecFuncOffsetTable = 4, + SecFuncMetadata = 5, // marker for the first type of profile. SecFuncProfileFirst = 32, SecLBRProfile = SecFuncProfileFirst @@ -139,6 +139,8 @@ static inline std::string getSecName(SecType Type) { return "ProfileSymbolListSection"; case SecFuncOffsetTable: return "FuncOffsetTableSection"; + case SecFuncMetadata: + return "FunctionMetadata"; case SecLBRProfile: return "LBRProfileSection"; } @@ -152,6 +154,9 @@ struct SecHdrTableEntry { uint64_t Flags; uint64_t Offset; uint64_t Size; + // The index indicating the location of the current entry in + // SectionHdrLayout table. + uint32_t LayoutIndex; }; // Flags common for all sections are defined here. In SecHdrTableEntry::Flags, @@ -159,7 +164,9 @@ struct SecHdrTableEntry { // will be saved in the higher 32 bits. enum class SecCommonFlags : uint32_t { SecFlagInValid = 0, - SecFlagCompress = (1 << 0) + SecFlagCompress = (1 << 0), + // Indicate the section contains only profile without context. + SecFlagFlat = (1 << 1) }; // Section specific flags are defined here. @@ -167,7 +174,10 @@ enum class SecCommonFlags : uint32_t { // a new check in verifySecFlag. enum class SecNameTableFlags : uint32_t { SecFlagInValid = 0, - SecFlagMD5Name = (1 << 0) + SecFlagMD5Name = (1 << 0), + // Store MD5 in fixed length instead of ULEB128 so NameTable can be + // accessed like an array. + SecFlagFixedLengthMD5 = (1 << 1) }; enum class SecProfSummaryFlags : uint32_t { SecFlagInValid = 0, @@ -177,6 +187,11 @@ enum class SecProfSummaryFlags : uint32_t { SecFlagPartial = (1 << 0) }; +enum class SecFuncMetadataFlags : uint32_t { + SecFlagInvalid = 0, + SecFlagIsProbeBased = (1 << 0), +}; + // Verify section specific flag is used for the correct section. template <class SecFlagType> static inline void verifySecFlag(SecType Type, SecFlagType Flag) { @@ -193,6 +208,9 @@ static inline void verifySecFlag(SecType Type, SecFlagType Flag) { case SecProfSummary: IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>(); break; + case SecFuncMetadata: + IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>(); + break; default: break; } @@ -244,6 +262,14 @@ struct LineLocation { (LineOffset == O.LineOffset && Discriminator < O.Discriminator); } + bool operator==(const LineLocation &O) const { + return LineOffset == O.LineOffset && Discriminator == O.Discriminator; + } + + bool operator!=(const LineLocation &O) const { + return LineOffset != O.LineOffset || Discriminator != O.Discriminator; + } + uint32_t LineOffset; uint32_t Discriminator; }; @@ -341,7 +367,131 @@ private: raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample); +// State of context associated with FunctionSamples +enum ContextStateMask { + UnknownContext = 0x0, // Profile without context + RawContext = 0x1, // Full context profile from input profile + SyntheticContext = 0x2, // Synthetic context created for context promotion + InlinedContext = 0x4, // Profile for context that is inlined into caller + MergedContext = 0x8 // Profile for context merged into base profile +}; + +// Sample context for FunctionSamples. It consists of the calling context, +// the function name and context state. Internally sample context is represented +// using StringRef, which is also the input for constructing a `SampleContext`. +// It can accept and represent both full context string as well as context-less +// function name. +// Example of full context string (note the wrapping `[]`): +// `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` +// Example of context-less function name (same as AutoFDO): +// `_Z8funcLeafi` +class SampleContext { +public: + SampleContext() : State(UnknownContext) {} + SampleContext(StringRef ContextStr, + ContextStateMask CState = UnknownContext) { + setContext(ContextStr, CState); + } + + // Promote context by removing top frames (represented by `ContextStrToRemove`). + // Note that with string representation of context, the promotion is effectively + // a substr operation with `ContextStrToRemove` removed from left. + void promoteOnPath(StringRef ContextStrToRemove) { + assert(FullContext.startswith(ContextStrToRemove)); + + // Remove leading context and frame separator " @ ". + FullContext = FullContext.substr(ContextStrToRemove.size() + 3); + CallingContext = CallingContext.substr(ContextStrToRemove.size() + 3); + } + + // Split the top context frame (left-most substr) from context. + static std::pair<StringRef, StringRef> + splitContextString(StringRef ContextStr) { + return ContextStr.split(" @ "); + } + + // Decode context string for a frame to get function name and location. + // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`. + static void decodeContextString(StringRef ContextStr, StringRef &FName, + LineLocation &LineLoc) { + // Get function name + auto EntrySplit = ContextStr.split(':'); + FName = EntrySplit.first; + + LineLoc = {0, 0}; + if (!EntrySplit.second.empty()) { + // Get line offset, use signed int for getAsInteger so string will + // be parsed as signed. + int LineOffset = 0; + auto LocSplit = EntrySplit.second.split('.'); + LocSplit.first.getAsInteger(10, LineOffset); + LineLoc.LineOffset = LineOffset; + + // Get discriminator + if (!LocSplit.second.empty()) + LocSplit.second.getAsInteger(10, LineLoc.Discriminator); + } + } + + operator StringRef() const { return FullContext; } + bool hasState(ContextStateMask S) { return State & (uint32_t)S; } + void setState(ContextStateMask S) { State |= (uint32_t)S; } + void clearState(ContextStateMask S) { State &= (uint32_t)~S; } + bool hasContext() const { return State != UnknownContext; } + bool isBaseContext() const { return CallingContext.empty(); } + StringRef getName() const { return Name; } + StringRef getCallingContext() const { return CallingContext; } + StringRef getNameWithContext() const { return FullContext; } + +private: + // Give a context string, decode and populate internal states like + // Function name, Calling context and context state. Example of input + // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` + void setContext(StringRef ContextStr, ContextStateMask CState) { + assert(!ContextStr.empty()); + // Note that `[]` wrapped input indicates a full context string, otherwise + // it's treated as context-less function name only. + bool HasContext = ContextStr.startswith("["); + if (!HasContext && CState == UnknownContext) { + State = UnknownContext; + Name = FullContext = ContextStr; + } else { + // Assume raw context profile if unspecified + if (CState == UnknownContext) + State = RawContext; + else + State = CState; + + // Remove encapsulating '[' and ']' if any + if (HasContext) + FullContext = ContextStr.substr(1, ContextStr.size() - 2); + else + FullContext = ContextStr; + + // Caller is to the left of callee in context string + auto NameContext = FullContext.rsplit(" @ "); + if (NameContext.second.empty()) { + Name = NameContext.first; + CallingContext = NameContext.second; + } else { + Name = NameContext.second; + CallingContext = NameContext.first; + } + } + } + + // Full context string including calling context and leaf function name + StringRef FullContext; + // Function name for the associated sample profile + StringRef Name; + // Calling context (leaf function excluded) for the associated sample profile + StringRef CallingContext; + // State of the associated sample profile + uint32_t State; +}; + class FunctionSamples; +class SampleProfileReaderItaniumRemapper; using BodySampleMap = std::map<LineLocation, SampleRecord>; // NOTE: Using a StringMap here makes parsed profiles consume around 17% more @@ -369,6 +519,8 @@ public: : sampleprof_error::success; } + void setTotalSamples(uint64_t Num) { TotalSamples = Num; } + sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) { bool Overflowed; TotalHeadSamples = @@ -397,10 +549,22 @@ public: ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset, uint32_t Discriminator) const { const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator)); - if (ret == BodySamples.end()) + if (ret == BodySamples.end()) { + // For CSSPGO, in order to conserve profile size, we no longer write out + // locations profile for those not hit during training, so we need to + // treat them as zero instead of error here. + if (ProfileIsCS) + return 0; return std::error_code(); - else + // A missing counter for a probe likely means the probe was not executed. + // Treat it as a zero count instead of an unknown count to help edge + // weight inference. + if (FunctionSamples::ProfileIsProbeBased) + return 0; + return std::error_code(); + } else { return ret->second.getSamples(); + } } /// Returns the call target map collected at a given location. @@ -414,6 +578,16 @@ public: return ret->second.getCallTargets(); } + /// Returns the call target map collected at a given location specified by \p + /// CallSite. If the location is not found in profile, return error. + ErrorOr<SampleRecord::CallTargetMap> + findCallTargetMapAt(const LineLocation &CallSite) const { + const auto &Ret = BodySamples.find(CallSite); + if (Ret == BodySamples.end()) + return std::error_code(); + return Ret->second.getCallTargets(); + } + /// Return the function samples at the given callsite location. FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) { return CallsiteSamples[Loc]; @@ -428,35 +602,15 @@ public: return &iter->second; } - /// Returns a pointer to FunctionSamples at the given callsite location \p Loc - /// with callee \p CalleeName. If no callsite can be found, relax the - /// restriction to return the FunctionSamples at callsite location \p Loc - /// with the maximum total sample count. - const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc, - StringRef CalleeName) const { - std::string CalleeGUID; - CalleeName = getRepInFormat(CalleeName, UseMD5, CalleeGUID); - - auto iter = CallsiteSamples.find(Loc); - if (iter == CallsiteSamples.end()) - return nullptr; - auto FS = iter->second.find(CalleeName); - if (FS != iter->second.end()) - return &FS->second; - // If we cannot find exact match of the callee name, return the FS with - // the max total count. Only do this when CalleeName is not provided, - // i.e., only for indirect calls. - if (!CalleeName.empty()) - return nullptr; - uint64_t MaxTotalSamples = 0; - const FunctionSamples *R = nullptr; - for (const auto &NameFS : iter->second) - if (NameFS.second.getTotalSamples() >= MaxTotalSamples) { - MaxTotalSamples = NameFS.second.getTotalSamples(); - R = &NameFS.second; - } - return R; - } + /// Returns a pointer to FunctionSamples at the given callsite location + /// \p Loc with callee \p CalleeName. If no callsite can be found, relax + /// the restriction to return the FunctionSamples at callsite location + /// \p Loc with the maximum total sample count. If \p Remapper is not + /// nullptr, use \p Remapper to find FunctionSamples with equivalent name + /// as \p CalleeName. + const FunctionSamples * + findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName, + SampleProfileReaderItaniumRemapper *Remapper) const; bool empty() const { return TotalSamples == 0; } @@ -473,6 +627,11 @@ public: /// Return the sample count of the first instruction of the function. /// The function can be either a standalone symbol or an inlined function. uint64_t getEntrySamples() const { + if (FunctionSamples::ProfileIsCS && getHeadSamples()) { + // For CS profile, if we already have more accurate head samples + // counted by branch sample from caller, use them as entry samples. + return getHeadSamples(); + } uint64_t Count = 0; // Use either BodySamples or CallsiteSamples which ever has the smaller // lineno. @@ -515,6 +674,23 @@ public: sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) { sampleprof_error Result = sampleprof_error::success; Name = Other.getName(); + if (!GUIDToFuncNameMap) + GUIDToFuncNameMap = Other.GUIDToFuncNameMap; + + if (FunctionHash == 0) { + // Set the function hash code for the target profile. + FunctionHash = Other.getFunctionHash(); + } else if (FunctionHash != Other.getFunctionHash()) { + // The two profiles coming with different valid hash codes indicates + // either: + // 1. They are same-named static functions from different compilation + // units (without using -unique-internal-linkage-names), or + // 2. They are really the same function but from different compilations. + // Let's bail out in either case for now, which means one profile is + // dropped. + return sampleprof_error::hash_mismatch; + } + MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight)); MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight)); for (const auto &I : Other.getBodySamples()) { @@ -566,19 +742,32 @@ public: /// Return the function name. StringRef getName() const { return Name; } + /// Return function name with context. + StringRef getNameWithContext() const { + return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name; + } + /// Return the original function name. StringRef getFuncName() const { return getFuncName(Name); } + void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; } + + uint64_t getFunctionHash() const { return FunctionHash; } + /// Return the canonical name for a function, taking into account /// suffix elision policy attributes. static StringRef getCanonicalFnName(const Function &F) { - static const char *knownSuffixes[] = { ".llvm.", ".part." }; auto AttrName = "sample-profile-suffix-elision-policy"; auto Attr = F.getFnAttribute(AttrName).getValueAsString(); + return getCanonicalFnName(F.getName(), Attr); + } + + static StringRef getCanonicalFnName(StringRef FnName, StringRef Attr = "") { + static const char *knownSuffixes[] = { ".llvm.", ".part." }; if (Attr == "" || Attr == "all") { - return F.getName().split('.').first; + return FnName.split('.').first; } else if (Attr == "selected") { - StringRef Cand(F.getName()); + StringRef Cand(FnName); for (const auto &Suf : knownSuffixes) { StringRef Suffix(Suf); auto It = Cand.rfind(Suffix); @@ -590,11 +779,11 @@ public: } return Cand; } else if (Attr == "none") { - return F.getName(); + return FnName; } else { assert(false && "internal error: unknown suffix elision policy"); } - return F.getName(); + return FnName; } /// Translate \p Name into its original name. @@ -609,16 +798,19 @@ public: return Name; assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be popluated first"); - auto iter = GUIDToFuncNameMap->find(std::stoull(Name.data())); - if (iter == GUIDToFuncNameMap->end()) - return StringRef(); - return iter->second; + return GUIDToFuncNameMap->lookup(std::stoull(Name.data())); } /// Returns the line offset to the start line of the subprogram. /// We assume that a single function will not exceed 65535 LOC. static unsigned getOffset(const DILocation *DIL); + /// Returns a unique call site identifier for a given debug location of a call + /// instruction. This is wrapper of two scenarios, the probe-based profile and + /// regular profile, to hide implementation details from the sample loader and + /// the context tracker. + static LineLocation getCallSiteIdentifier(const DILocation *DIL); + /// Get the FunctionSamples of the inline instance where DIL originates /// from. /// @@ -628,7 +820,19 @@ public: /// tree nodes in the profile. /// /// \returns the FunctionSamples pointer to the inlined instance. - const FunctionSamples *findFunctionSamples(const DILocation *DIL) const; + /// If \p Remapper is not nullptr, it will be used to find matching + /// FunctionSamples with not exactly the same but equivalent name. + const FunctionSamples *findFunctionSamples( + const DILocation *DIL, + SampleProfileReaderItaniumRemapper *Remapper = nullptr) const; + + static bool ProfileIsProbeBased; + + static bool ProfileIsCS; + + SampleContext &getContext() const { return Context; } + + void setContext(const SampleContext &FContext) { Context = FContext; } static SampleProfileFormat Format; @@ -646,10 +850,20 @@ public: return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name); } + // Find all the names in the current FunctionSamples including names in + // all the inline instances and names of call targets. + void findAllNames(DenseSet<StringRef> &NameSet) const; + private: /// Mangled name of the function. StringRef Name; + /// CFG hash value for the function. + uint64_t FunctionHash = 0; + + /// Calling context for function profile + mutable SampleContext Context; + /// Total number of samples collected inside this function. /// /// Samples are cumulative, they include all the samples collected diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 0e8ee7696c54..3f52a2f6163b 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -27,8 +27,9 @@ // offsetA[.discriminator]: fnA:num_of_total_samples // offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ] // ... +// !CFGChecksum: num // -// This is a nested tree in which the identation represents the nesting level +// This is a nested tree in which the indentation represents the nesting level // of the inline stack. There are no blank lines in the file. And the spacing // within a single line is fixed. Additional spaces will result in an error // while reading the file. @@ -47,10 +48,11 @@ // in the prologue of the function (second number). This head sample // count provides an indicator of how frequently the function is invoked. // -// There are two types of lines in the function body. +// There are three types of lines in the function body. // // * Sampled line represents the profile information of a source location. // * Callsite line represents the profile information of a callsite. +// * Metadata line represents extra metadata of the function. // // Each sampled line may contain several items. Some are optional (marked // below): @@ -114,6 +116,18 @@ // total number of samples collected for the inlined instance at this // callsite // +// Metadata line can occur in lines with one indent only, containing extra +// information for the top-level function. Furthermore, metadata can only +// occur after all the body samples and callsite samples. +// Each metadata line may contain a particular type of metadata, marked by +// the starting characters annotated with !. We process each metadata line +// independently, hence each metadata line has to form an independent piece +// of information that does not require cross-line reference. +// We support the following types of metadata: +// +// a. CFG Checksum (a.k.a. function hash): +// !CFGChecksum: 12345 +// // // Binary format // ------------- @@ -208,10 +222,10 @@ #ifndef LLVM_PROFILEDATA_SAMPLEPROFREADER_H #define LLVM_PROFILEDATA_SAMPLEPROFREADER_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" @@ -232,6 +246,7 @@ namespace llvm { class raw_ostream; +class Twine; namespace sampleprof { @@ -275,15 +290,18 @@ public: return Remappings->lookup(FunctionName); } - /// Return the samples collected for function \p F if remapper knows - /// it is present in SampleMap. - FunctionSamples *getSamplesFor(StringRef FunctionName); + /// Return the equivalent name in the profile for \p FunctionName if + /// it exists. + Optional<StringRef> lookUpNameInProfile(StringRef FunctionName); private: // The buffer holding the content read from remapping file. std::unique_ptr<MemoryBuffer> Buffer; std::unique_ptr<SymbolRemappingReader> Remappings; - DenseMap<SymbolRemappingReader::Key, FunctionSamples *> SampleMap; + // Map remapping key to the name in the profile. By looking up the + // key in the remapper, a given new name can be mapped to the + // cannonical name using the NameMap. + DenseMap<SymbolRemappingReader::Key, StringRef> NameMap; // The Reader the remapper is servicing. SampleProfileReader &Reader; // Indicate whether remapping has been applied to the profile read @@ -370,15 +388,19 @@ public: /// Return the samples collected for function \p F. virtual FunctionSamples *getSamplesFor(StringRef Fname) { - if (Remapper) { - if (auto FS = Remapper->getSamplesFor(Fname)) - return FS; - } std::string FGUID; Fname = getRepInFormat(Fname, useMD5(), FGUID); auto It = Profiles.find(Fname); if (It != Profiles.end()) return &It->second; + + if (Remapper) { + if (auto NameInProfile = Remapper->lookUpNameInProfile(Fname)) { + auto It = Profiles.find(*NameInProfile); + if (It != Profiles.end()) + return &It->second; + } + } return nullptr; } @@ -386,7 +408,7 @@ public: StringMap<FunctionSamples> &getProfiles() { return Profiles; } /// Report a parse error message. - void reportError(int64_t LineNumber, Twine Msg) const { + void reportError(int64_t LineNumber, const Twine &Msg) const { Ctx.diagnose(DiagnosticInfoSampleProfile(Buffer->getBufferIdentifier(), LineNumber, Msg)); } @@ -411,6 +433,12 @@ public: /// \brief Return the profile format. SampleProfileFormat getFormat() const { return Format; } + /// Whether input profile is based on pseudo probes. + bool profileIsProbeBased() const { return ProfileIsProbeBased; } + + /// Whether input profile is fully context-sensitive + bool profileIsCS() const { return ProfileIsCS; } + virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() { return nullptr; }; @@ -423,6 +451,12 @@ public: /// Return whether names in the profile are all MD5 numbers. virtual bool useMD5() { return false; } + /// Don't read profile without context if the flag is set. This is only meaningful + /// for ExtBinary format. + virtual void setSkipFlatProf(bool Skip) {} + + SampleProfileReaderItaniumRemapper *getRemapper() { return Remapper.get(); } + protected: /// Map every function to its associated profile. /// @@ -451,6 +485,11 @@ protected: std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper; + /// \brief Whether samples are collected based on pseudo probes. + bool ProfileIsProbeBased = false; + + bool ProfileIsCS = false; + /// \brief The format of sample. SampleProfileFormat Format = SPF_None; }; @@ -588,40 +627,25 @@ private: protected: std::vector<SecHdrTableEntry> SecHdrTable; - std::unique_ptr<ProfileSymbolList> ProfSymList; - std::error_code readSecHdrTableEntry(); + std::error_code readSecHdrTableEntry(uint32_t Idx); std::error_code readSecHdrTable(); - virtual std::error_code readHeader() override; - virtual std::error_code verifySPMagic(uint64_t Magic) override = 0; - virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size, - const SecHdrTableEntry &Entry) = 0; - -public: - SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B, - LLVMContext &C, SampleProfileFormat Format) - : SampleProfileReaderBinary(std::move(B), C, Format) {} - /// Read sample profiles in extensible format from the associated file. - std::error_code readImpl() override; - - /// Get the total size of all \p Type sections. - uint64_t getSectionSize(SecType Type); - /// Get the total size of header and all sections. - uint64_t getFileSize(); - virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) override; -}; - -class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase { -private: - virtual std::error_code verifySPMagic(uint64_t Magic) override; - virtual std::error_code - readOneSection(const uint8_t *Start, uint64_t Size, - const SecHdrTableEntry &Entry) override; - std::error_code readProfileSymbolList(); + std::error_code readFuncMetadata(); std::error_code readFuncOffsetTable(); std::error_code readFuncProfiles(); std::error_code readMD5NameTable(); std::error_code readNameTableSec(bool IsMD5); + std::error_code readProfileSymbolList(); + + virtual std::error_code readHeader() override; + virtual std::error_code verifySPMagic(uint64_t Magic) override = 0; + virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size, + const SecHdrTableEntry &Entry); + // placeholder for subclasses to dispatch their own section readers. + virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0; + virtual ErrorOr<StringRef> readStringFromTable() override; + + std::unique_ptr<ProfileSymbolList> ProfSymList; /// The table mapping from function name to the offset of its FunctionSample /// towards file start. @@ -631,6 +655,12 @@ private: /// Use all functions from the input profile. bool UseAllFuncs = true; + /// Use fixed length MD5 instead of ULEB128 encoding so NameTable doesn't + /// need to be read in up front and can be directly accessed using index. + bool FixedLengthMD5 = false; + /// The starting address of NameTable containing fixed length MD5. + const uint8_t *MD5NameMemStart = nullptr; + /// If MD5 is used in NameTable section, the section saves uint64_t data. /// The uint64_t data has to be converted to a string and then the string /// will be used to initialize StringRef in NameTable. @@ -640,26 +670,52 @@ private: /// the lifetime of MD5StringBuf is not shorter than that of NameTable. std::unique_ptr<std::vector<std::string>> MD5StringBuf; + /// If SkipFlatProf is true, skip the sections with + /// SecFlagFlat flag. + bool SkipFlatProf = false; + public: - SampleProfileReaderExtBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C, - SampleProfileFormat Format = SPF_Ext_Binary) - : SampleProfileReaderExtBinaryBase(std::move(B), C, Format) {} + SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B, + LLVMContext &C, SampleProfileFormat Format) + : SampleProfileReaderBinary(std::move(B), C, Format) {} - /// \brief Return true if \p Buffer is in the format supported by this class. - static bool hasFormat(const MemoryBuffer &Buffer); + /// Read sample profiles in extensible format from the associated file. + std::error_code readImpl() override; - virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override { - return std::move(ProfSymList); - }; + /// Get the total size of all \p Type sections. + uint64_t getSectionSize(SecType Type); + /// Get the total size of header and all sections. + uint64_t getFileSize(); + virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) override; /// Collect functions with definitions in Module \p M. void collectFuncsFrom(const Module &M) override; /// Return whether names in the profile are all MD5 numbers. - virtual bool useMD5() override { - assert(!NameTable.empty() && "NameTable should have been initialized"); - return MD5StringBuf && !MD5StringBuf->empty(); - } + virtual bool useMD5() override { return MD5StringBuf.get(); } + + virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override { + return std::move(ProfSymList); + }; + + virtual void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; } +}; + +class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase { +private: + virtual std::error_code verifySPMagic(uint64_t Magic) override; + virtual std::error_code + readCustomSection(const SecHdrTableEntry &Entry) override { + return sampleprof_error::success; + }; + +public: + SampleProfileReaderExtBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C, + SampleProfileFormat Format = SPF_Ext_Binary) + : SampleProfileReaderExtBinaryBase(std::move(B), C, Format) {} + + /// \brief Return true if \p Buffer is in the format supported by this class. + static bool hasFormat(const MemoryBuffer &Buffer); }; class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary { diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h index 7d0df9e44f58..419ebd6eb7ae 100644 --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -15,6 +15,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/ErrorOr.h" @@ -28,6 +29,15 @@ namespace llvm { namespace sampleprof { +enum SectionLayout { + DefaultLayout, + // The layout splits profile with context information from profile without + // context information. When Thinlto is enabled, ThinLTO postlink phase only + // has to load profile with context information and can skip the other part. + CtxSplitLayout, + NumOfLayout, +}; + /// Sample-based profile writer. Base class. class SampleProfileWriter { public: @@ -60,6 +70,7 @@ public: virtual void setToCompressAllSections() {} virtual void setUseMD5() {} virtual void setPartialProfile() {} + virtual void resetSecLayout(SectionLayout SL) {} protected: SampleProfileWriter(std::unique_ptr<raw_ostream> &OS) @@ -144,6 +155,36 @@ class SampleProfileWriterRawBinary : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; }; +const std::array<SmallVector<SecHdrTableEntry, 8>, NumOfLayout> + ExtBinaryHdrLayoutTable = { + // Note that SecFuncOffsetTable section is written after SecLBRProfile + // in the profile, but is put before SecLBRProfile in SectionHdrLayout. + // This is because sample reader follows the order in SectionHdrLayout + // to read each section. To read function profiles on demand, sample + // reader need to get the offset of each function profile first. + // + // DefaultLayout + SmallVector<SecHdrTableEntry, 8>({{SecProfSummary, 0, 0, 0, 0}, + {SecNameTable, 0, 0, 0, 0}, + {SecFuncOffsetTable, 0, 0, 0, 0}, + {SecLBRProfile, 0, 0, 0, 0}, + {SecProfileSymbolList, 0, 0, 0, 0}, + {SecFuncMetadata, 0, 0, 0, 0}}), + // CtxSplitLayout + SmallVector<SecHdrTableEntry, 8>({{SecProfSummary, 0, 0, 0, 0}, + {SecNameTable, 0, 0, 0, 0}, + // profile with context + // for next two sections + {SecFuncOffsetTable, 0, 0, 0, 0}, + {SecLBRProfile, 0, 0, 0, 0}, + // profile without context + // for next two sections + {SecFuncOffsetTable, 0, 0, 0, 0}, + {SecLBRProfile, 0, 0, 0, 0}, + {SecProfileSymbolList, 0, 0, 0, 0}, + {SecFuncMetadata, 0, 0, 0, 0}}), +}; + class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; public: @@ -152,10 +193,45 @@ public: virtual void setToCompressAllSections() override; void setToCompressSection(SecType Type); + virtual std::error_code writeSample(const FunctionSamples &S) override; + + // Set to use MD5 to represent string in NameTable. + virtual void setUseMD5() override { + UseMD5 = true; + addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagMD5Name); + // MD5 will be stored as plain uint64_t instead of variable-length + // quantity format in NameTable section. + addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagFixedLengthMD5); + } + + // Set the profile to be partial. It means the profile is for + // common/shared code. The common profile is usually merged from + // profiles collected from running other targets. + virtual void setPartialProfile() override { + addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagPartial); + } + + virtual void setProfileSymbolList(ProfileSymbolList *PSL) override { + ProfSymList = PSL; + }; + + virtual void resetSecLayout(SectionLayout SL) override { + verifySecLayout(SL); +#ifndef NDEBUG + // Make sure resetSecLayout is called before any flag setting. + for (auto &Entry : SectionHdrLayout) { + assert(Entry.Flags == 0 && + "resetSecLayout has to be called before any flag setting"); + } +#endif + SecLayout = SL; + SectionHdrLayout = ExtBinaryHdrLayoutTable[SL]; + } protected: - uint64_t markSectionStart(SecType Type); - std::error_code addNewSection(SecType Sec, uint64_t SectionStart); + uint64_t markSectionStart(SecType Type, uint32_t LayoutIdx); + std::error_code addNewSection(SecType Sec, uint32_t LayoutIdx, + uint64_t SectionStart); template <class SecFlagType> void addSectionFlag(SecType Type, SecFlagType Flag) { for (auto &Entry : SectionHdrLayout) { @@ -163,23 +239,55 @@ protected: addSecFlag(Entry, Flag); } } + template <class SecFlagType> + void addSectionFlag(uint32_t SectionIdx, SecFlagType Flag) { + addSecFlag(SectionHdrLayout[SectionIdx], Flag); + } + + // placeholder for subclasses to dispatch their own section writers. + virtual std::error_code writeCustomSection(SecType Type) = 0; + // Verify the SecLayout is supported by the format. + virtual void verifySecLayout(SectionLayout SL) = 0; - virtual void initSectionHdrLayout() = 0; + // specify the order to write sections. virtual std::error_code writeSections(const StringMap<FunctionSamples> &ProfileMap) = 0; + // Dispatch section writer for each section. \p LayoutIdx is the sequence + // number indicating where the section is located in SectionHdrLayout. + virtual std::error_code + writeOneSection(SecType Type, uint32_t LayoutIdx, + const StringMap<FunctionSamples> &ProfileMap); + + // Helper function to write name table. + virtual std::error_code writeNameTable() override; + + std::error_code writeFuncMetadata(const StringMap<FunctionSamples> &Profiles); + + // Functions to write various kinds of sections. + std::error_code + writeNameTableSection(const StringMap<FunctionSamples> &ProfileMap); + std::error_code writeFuncOffsetTable(); + std::error_code writeProfileSymbolListSection(); + + SectionLayout SecLayout = DefaultLayout; // Specifiy the order of sections in section header table. Note - // the order of sections in the profile may be different that the + // the order of sections in SecHdrTable may be different that the // order in SectionHdrLayout. sample Reader will follow the order // in SectionHdrLayout to read each section. - SmallVector<SecHdrTableEntry, 8> SectionHdrLayout; + SmallVector<SecHdrTableEntry, 8> SectionHdrLayout = + ExtBinaryHdrLayoutTable[DefaultLayout]; + + // Save the start of SecLBRProfile so we can compute the offset to the + // start of SecLBRProfile for each Function's Profile and will keep it + // in FuncOffsetTable. + uint64_t SecLBRProfileStart = 0; private: void allocSecHdrTable(); std::error_code writeSecHdrTable(); virtual std::error_code writeHeader(const StringMap<FunctionSamples> &ProfileMap) override; - SecHdrTableEntry &getEntryInLayout(SecType Type); std::error_code compressAndOutput(); // We will swap the raw_ostream held by LocalBufStream and that @@ -196,70 +304,43 @@ private: // The location in the output stream where the SecHdrTable should be // written to. uint64_t SecHdrTableOffset; - // Initial Section Flags setting. + // The table contains SecHdrTableEntry entries in order of how they are + // populated in the writer. It may be different from the order in + // SectionHdrLayout which specifies the sequence in which sections will + // be read. std::vector<SecHdrTableEntry> SecHdrTable; + + // FuncOffsetTable maps function name to its profile offset in SecLBRProfile + // section. It is used to load function profile on demand. + MapVector<StringRef, uint64_t> FuncOffsetTable; + // Whether to use MD5 to represent string. + bool UseMD5 = false; + + ProfileSymbolList *ProfSymList = nullptr; }; class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase { public: SampleProfileWriterExtBinary(std::unique_ptr<raw_ostream> &OS) - : SampleProfileWriterExtBinaryBase(OS) { - initSectionHdrLayout(); - } - - virtual std::error_code writeSample(const FunctionSamples &S) override; - virtual void setProfileSymbolList(ProfileSymbolList *PSL) override { - ProfSymList = PSL; - }; - - // Set to use MD5 to represent string in NameTable. - virtual void setUseMD5() override { - UseMD5 = true; - addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagMD5Name); - } - - // Set the profile to be partial. It means the profile is for - // common/shared code. The common profile is usually merged from - // profiles collected from running other targets. - virtual void setPartialProfile() override { - addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagPartial); - } + : SampleProfileWriterExtBinaryBase(OS) {} private: - virtual void initSectionHdrLayout() override { - // Note that SecFuncOffsetTable section is written after SecLBRProfile - // in the profile, but is put before SecLBRProfile in SectionHdrLayout. - // - // This is because sample reader follows the order of SectionHdrLayout to - // read each section, to read function profiles on demand sample reader - // need to get the offset of each function profile first. - // - // SecFuncOffsetTable section is written after SecLBRProfile in the - // profile because FuncOffsetTable needs to be populated while section - // SecLBRProfile is written. - SectionHdrLayout = {{SecProfSummary, 0, 0, 0}, - {SecNameTable, 0, 0, 0}, - {SecFuncOffsetTable, 0, 0, 0}, - {SecLBRProfile, 0, 0, 0}, - {SecProfileSymbolList, 0, 0, 0}}; - }; + std::error_code + writeDefaultLayout(const StringMap<FunctionSamples> &ProfileMap); + std::error_code + writeCtxSplitLayout(const StringMap<FunctionSamples> &ProfileMap); + virtual std::error_code writeSections(const StringMap<FunctionSamples> &ProfileMap) override; - std::error_code writeFuncOffsetTable(); - virtual std::error_code writeNameTable() override; - - ProfileSymbolList *ProfSymList = nullptr; + virtual std::error_code writeCustomSection(SecType Type) override { + return sampleprof_error::success; + }; - // Save the start of SecLBRProfile so we can compute the offset to the - // start of SecLBRProfile for each Function's Profile and will keep it - // in FuncOffsetTable. - uint64_t SecLBRProfileStart = 0; - // FuncOffsetTable maps function name to its profile offset in SecLBRProfile - // section. It is used to load function profile on demand. - MapVector<StringRef, uint64_t> FuncOffsetTable; - // Whether to use MD5 to represent string. - bool UseMD5 = false; + virtual void verifySecLayout(SectionLayout SL) override { + assert((SL == DefaultLayout || SL == CtxSplitLayout) && + "Unsupported layout"); + } }; // CompactBinary is a compact format of binary profile which both reduces diff --git a/llvm/include/llvm/Remarks/BitstreamRemarkParser.h b/llvm/include/llvm/Remarks/BitstreamRemarkParser.h index 7ebd731693b2..f7553ba53958 100644 --- a/llvm/include/llvm/Remarks/BitstreamRemarkParser.h +++ b/llvm/include/llvm/Remarks/BitstreamRemarkParser.h @@ -14,13 +14,13 @@ #ifndef LLVM_REMARKS_BITSTREAM_REMARK_PARSER_H #define LLVM_REMARKS_BITSTREAM_REMARK_PARSER_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Bitstream/BitstreamReader.h" -#include "llvm/Remarks/BitstreamRemarkContainer.h" -#include "llvm/Remarks/Remark.h" -#include "llvm/Remarks/RemarkParser.h" #include "llvm/Support/Error.h" #include <array> +#include <cstdint> namespace llvm { namespace remarks { diff --git a/llvm/include/llvm/Remarks/HotnessThresholdParser.h b/llvm/include/llvm/Remarks/HotnessThresholdParser.h new file mode 100644 index 000000000000..08bbf5f70b81 --- /dev/null +++ b/llvm/include/llvm/Remarks/HotnessThresholdParser.h @@ -0,0 +1,63 @@ +//===- HotnessThresholdParser.h - Parser for hotness threshold --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a simple parser to decode commandline option for +/// remarks hotness threshold that supports both int and a special 'auto' value. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_REMARKS_HOTNESSTHRESHOLDPARSER_H +#define LLVM_REMARKS_HOTNESSTHRESHOLDPARSER_H + +#include "llvm/ADT/Optional.h" +#include "llvm/Support/CommandLine.h" + +namespace llvm { +namespace remarks { + +// Parse remarks hotness threshold argument value. +// Valid option values are +// 1. integer: manually specified threshold; or +// 2. string 'auto': automatically get threshold from profile summary. +// +// Return None Optional if 'auto' is specified, indicating the value will +// be filled later during PSI. +inline Expected<Optional<uint64_t>> parseHotnessThresholdOption(StringRef Arg) { + if (Arg == "auto") + return None; + + int64_t Val; + if (Arg.getAsInteger(10, Val)) + return createStringError(llvm::inconvertibleErrorCode(), + "Not an integer: %s", Arg.data()); + + // Negative integer effectively means no threshold + return Val < 0 ? 0 : Val; +} + +// A simple CL parser for '*-remarks-hotness-threshold=' +class HotnessThresholdParser : public cl::parser<Optional<uint64_t>> { +public: + HotnessThresholdParser(cl::Option &O) : cl::parser<Optional<uint64_t>>(O) {} + + bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, + Optional<uint64_t> &V) { + auto ResultOrErr = parseHotnessThresholdOption(Arg); + if (!ResultOrErr) + return O.error("Invalid argument '" + Arg + + "', only integer or 'auto' is supported."); + + V = *ResultOrErr; + return false; + } +}; + +} // namespace remarks +} // namespace llvm +#endif // LLVM_REMARKS_HOTNESSTHRESHOLDPARSER_H diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index 13b7cfc4b5cd..332fb555e824 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -51,6 +51,21 @@ AARCH64_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a", AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 | AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM)) +AARCH64_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a", + ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, + (AArch64::AEK_CRC | AArch64::AEK_FP | + AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | + AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | + AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 | + AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM)) +// For v8-R, we do not enable crypto and align with GCC that enables a more +// minimal set of optional architecture extensions. +AARCH64_ARCH("armv8-r", ARMV8R, "8-R", "v8r", + ARMBuildAttrs::CPUArch::v8_R, FK_CRYPTO_NEON_FP_ARMV8, + (AArch64::AEK_CRC | AArch64::AEK_RDM | AArch64::AEK_SSBS | + AArch64::AEK_DOTPROD | AArch64::AEK_FP | AArch64::AEK_SIMD | + AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_RAS | + AArch64::AEK_RCPC | AArch64::AEK_SB)) #undef AARCH64_ARCH #ifndef AARCH64_ARCH_EXT_NAME @@ -91,6 +106,10 @@ AARCH64_ARCH_EXT_NAME("i8mm", AArch64::AEK_I8MM, "+i8mm", "-i8mm AARCH64_ARCH_EXT_NAME("f32mm", AArch64::AEK_F32MM, "+f32mm", "-f32mm") AARCH64_ARCH_EXT_NAME("f64mm", AArch64::AEK_F64MM, "+f64mm", "-f64mm") AARCH64_ARCH_EXT_NAME("tme", AArch64::AEK_TME, "+tme", "-tme") +AARCH64_ARCH_EXT_NAME("ls64", AArch64::AEK_LS64, "+ls64", "-ls64") +AARCH64_ARCH_EXT_NAME("brbe", AArch64::AEK_BRBE, "+brbe", "-brbe") +AARCH64_ARCH_EXT_NAME("pauth", AArch64::AEK_PAUTH, "+pauth", "-pauth") +AARCH64_ARCH_EXT_NAME("flagm", AArch64::AEK_FLAGM, "+flagm", "-flagm") #undef AARCH64_ARCH_EXT_NAME #ifndef AARCH64_CPU_NAME @@ -130,6 +149,11 @@ AARCH64_CPU_NAME("cortex-a77", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, AARCH64_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | + AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_LSE)) AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) @@ -140,6 +164,15 @@ AARCH64_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_PROFILE | AArch64::AEK_RAS | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("neoverse-n2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_BF16 | AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | + AArch64::AEK_I8MM | AArch64::AEK_MTE | AArch64::AEK_RAS | + AArch64::AEK_RCPC | AArch64::AEK_SB | AArch64::AEK_SSBS | + AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM)) +AARCH64_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS | + AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 | + AArch64::AEK_DOTPROD )) AARCH64_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_NONE)) AARCH64_CPU_NAME("apple-a7", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, @@ -156,6 +189,8 @@ AARCH64_CPU_NAME("apple-a12", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16)) AARCH64_CPU_NAME("apple-a13", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_FP16FML)) +AARCH64_CPU_NAME("apple-a14", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_FP16 | AArch64::AEK_FP16FML)) AARCH64_CPU_NAME("apple-s4", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16)) AARCH64_CPU_NAME("apple-s5", ARMV8_3A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h index b045e31bc92a..7c9e245e3889 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.h +++ b/llvm/include/llvm/Support/AArch64TargetParser.h @@ -62,6 +62,10 @@ enum ArchExtKind : uint64_t { AEK_I8MM = 1 << 30, AEK_F32MM = 1ULL << 31, AEK_F64MM = 1ULL << 32, + AEK_LS64 = 1ULL << 33, + AEK_BRBE = 1ULL << 34, + AEK_PAUTH = 1ULL << 35, + AEK_FLAGM = 1ULL << 36, }; enum class ArchKind { @@ -104,7 +108,7 @@ const ArchKind ArchKinds[] = { }; // FIXME: These should be moved to TargetTuple once it exists -bool getExtensionFeatures(unsigned Extensions, +bool getExtensionFeatures(uint64_t Extensions, std::vector<StringRef> &Features); bool getArchFeatures(ArchKind AK, std::vector<StringRef> &Features); @@ -117,7 +121,7 @@ StringRef getArchExtFeature(StringRef ArchExt); // Information by Name unsigned getDefaultFPU(StringRef CPU, ArchKind AK); -unsigned getDefaultExtensions(StringRef CPU, ArchKind AK); +uint64_t getDefaultExtensions(StringRef CPU, ArchKind AK); StringRef getDefaultCPU(StringRef Arch); ArchKind getCPUArchKind(StringRef CPU); diff --git a/llvm/include/llvm/Support/AMDGPUMetadata.h b/llvm/include/llvm/Support/AMDGPUMetadata.h index 920c97f7e112..eadc25870096 100644 --- a/llvm/include/llvm/Support/AMDGPUMetadata.h +++ b/llvm/include/llvm/Support/AMDGPUMetadata.h @@ -15,6 +15,7 @@ #ifndef LLVM_SUPPORT_AMDGPUMETADATA_H #define LLVM_SUPPORT_AMDGPUMETADATA_H +#include "llvm/ADT/StringRef.h" #include <cstdint> #include <string> #include <system_error> @@ -430,7 +431,7 @@ struct Metadata final { }; /// Converts \p String to \p HSAMetadata. -std::error_code fromString(std::string String, Metadata &HSAMetadata); +std::error_code fromString(StringRef String, Metadata &HSAMetadata); /// Converts \p HSAMetadata to \p String. std::error_code toString(Metadata HSAMetadata, std::string &String); diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h index d1c2147536a7..bd84da43dff7 100644 --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -100,7 +100,7 @@ enum : int32_t { #define COMPUTE_PGM_RSRC2(NAME, SHIFT, WIDTH) \ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC2_ ## NAME, SHIFT, WIDTH) enum : int32_t { - COMPUTE_PGM_RSRC2(ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, 0, 1), + COMPUTE_PGM_RSRC2(ENABLE_PRIVATE_SEGMENT, 0, 1), COMPUTE_PGM_RSRC2(USER_SGPR_COUNT, 1, 5), COMPUTE_PGM_RSRC2(ENABLE_TRAP_HANDLER, 6, 1), COMPUTE_PGM_RSRC2(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1), @@ -162,39 +162,49 @@ struct kernel_descriptor_t { uint8_t reserved2[6]; }; +enum : uint32_t { + GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0, + PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4, + RESERVED0_OFFSET = 8, + KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16, + RESERVED1_OFFSET = 24, + COMPUTE_PGM_RSRC3_OFFSET = 44, + COMPUTE_PGM_RSRC1_OFFSET = 48, + COMPUTE_PGM_RSRC2_OFFSET = 52, + KERNEL_CODE_PROPERTIES_OFFSET = 56, + RESERVED2_OFFSET = 58, +}; + static_assert( sizeof(kernel_descriptor_t) == 64, "invalid size for kernel_descriptor_t"); -static_assert( - offsetof(kernel_descriptor_t, group_segment_fixed_size) == 0, - "invalid offset for group_segment_fixed_size"); -static_assert( - offsetof(kernel_descriptor_t, private_segment_fixed_size) == 4, - "invalid offset for private_segment_fixed_size"); -static_assert( - offsetof(kernel_descriptor_t, reserved0) == 8, - "invalid offset for reserved0"); -static_assert( - offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == 16, - "invalid offset for kernel_code_entry_byte_offset"); -static_assert( - offsetof(kernel_descriptor_t, reserved1) == 24, - "invalid offset for reserved1"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44, - "invalid offset for compute_pgm_rsrc3"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48, - "invalid offset for compute_pgm_rsrc1"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == 52, - "invalid offset for compute_pgm_rsrc2"); -static_assert( - offsetof(kernel_descriptor_t, kernel_code_properties) == 56, - "invalid offset for kernel_code_properties"); -static_assert( - offsetof(kernel_descriptor_t, reserved2) == 58, - "invalid offset for reserved2"); +static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) == + GROUP_SEGMENT_FIXED_SIZE_OFFSET, + "invalid offset for group_segment_fixed_size"); +static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) == + PRIVATE_SEGMENT_FIXED_SIZE_OFFSET, + "invalid offset for private_segment_fixed_size"); +static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET, + "invalid offset for reserved0"); +static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == + KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET, + "invalid offset for kernel_code_entry_byte_offset"); +static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET, + "invalid offset for reserved1"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == + COMPUTE_PGM_RSRC3_OFFSET, + "invalid offset for compute_pgm_rsrc3"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == + COMPUTE_PGM_RSRC1_OFFSET, + "invalid offset for compute_pgm_rsrc1"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == + COMPUTE_PGM_RSRC2_OFFSET, + "invalid offset for compute_pgm_rsrc2"); +static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) == + KERNEL_CODE_PROPERTIES_OFFSET, + "invalid offset for kernel_code_properties"); +static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET, + "invalid offset for reserved2"); } // end namespace amdhsa } // end namespace llvm diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def index 9f51c841e429..37cf0a93bb04 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.def +++ b/llvm/include/llvm/Support/ARMTargetParser.def @@ -118,6 +118,12 @@ ARM_ARCH("armv8.6-a", ARMV8_6A, "8.6-A", "v8.6a", ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES | ARM::AEK_I8MM)) +ARM_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a", + ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8, + (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | + ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_SHA2 | ARM::AEK_AES | + ARM::AEK_I8MM)) ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R, FK_NEON_FP_ARMV8, (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | @@ -294,12 +300,19 @@ ARM_CPU_NAME("cortex-a76ae", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("cortex-a77", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) -ARM_CPU_NAME("cortex-a78",ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, +ARM_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) +ARM_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, + ARM::AEK_FP16 | ARM::AEK_DOTPROD) ARM_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) +ARM_CPU_NAME("neoverse-n2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false, + (ARM::AEK_BF16 | ARM::AEK_DOTPROD | ARM::AEK_I8MM | ARM::AEK_RAS | + ARM::AEK_SB)) +ARM_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false, + (ARM::AEK_RAS | ARM::AEK_FP16 | ARM::AEK_BF16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("cyclone", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("exynos-m3", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC) ARM_CPU_NAME("exynos-m4", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/llvm/include/llvm/Support/ARMTargetParser.h b/llvm/include/llvm/Support/ARMTargetParser.h index 4e76b3c4b83e..7dd2abd29212 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.h +++ b/llvm/include/llvm/Support/ARMTargetParser.h @@ -250,7 +250,8 @@ StringRef getSubArch(ArchKind AK); StringRef getArchExtName(uint64_t ArchExtKind); StringRef getArchExtFeature(StringRef ArchExt); bool appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK, StringRef ArchExt, - std::vector<StringRef> &Features); + std::vector<StringRef> &Features, + unsigned &ArgFPUKind); StringRef getHWDivName(uint64_t HWDivKind); // Information by Name diff --git a/llvm/include/llvm/Support/ARMWinEH.h b/llvm/include/llvm/Support/ARMWinEH.h index 857a0d3814a8..327aa9804849 100644 --- a/llvm/include/llvm/Support/ARMWinEH.h +++ b/llvm/include/llvm/Support/ARMWinEH.h @@ -31,6 +31,9 @@ enum class ReturnType { /// RuntimeFunction - An entry in the table of procedure data (.pdata) /// +/// This is ARM specific, but the Function Start RVA, Flag and +/// ExceptionInformationRVA fields work identically for ARM64. +/// /// 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 /// 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 /// +---------------------------------------------------------------+ @@ -204,6 +207,85 @@ inline uint16_t StackAdjustment(const RuntimeFunction &RF) { /// purpose (r0-r15) and VFP (d0-d31) registers. std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF); +/// RuntimeFunctionARM64 - An entry in the table of procedure data (.pdata) +/// +/// 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 +/// 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 +/// +---------------------------------------------------------------+ +/// | Function Start RVA | +/// +-----------------+---+-+-------+-----+---------------------+---+ +/// | Frame Size |CR |H| RegI |RegF | Function Length |Flg| +/// +-----------------+---+-+-------+-----+---------------------+---+ +/// +/// See https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling +/// for the full reference for this struct. + +class RuntimeFunctionARM64 { +public: + const support::ulittle32_t BeginAddress; + const support::ulittle32_t UnwindData; + + RuntimeFunctionARM64(const support::ulittle32_t *Data) + : BeginAddress(Data[0]), UnwindData(Data[1]) {} + + RuntimeFunctionARM64(const support::ulittle32_t BeginAddress, + const support::ulittle32_t UnwindData) + : BeginAddress(BeginAddress), UnwindData(UnwindData) {} + + RuntimeFunctionFlag Flag() const { + return RuntimeFunctionFlag(UnwindData & 0x3); + } + + uint32_t ExceptionInformationRVA() const { + assert(Flag() == RuntimeFunctionFlag::RFF_Unpacked && + "unpacked form required for this operation"); + return (UnwindData & ~0x3); + } + + uint32_t PackedUnwindData() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return (UnwindData & ~0x3); + } + uint32_t FunctionLength() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return (((UnwindData & 0x00001ffc) >> 2) << 2); + } + uint8_t RegF() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return ((UnwindData & 0x0000e000) >> 13); + } + uint8_t RegI() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return ((UnwindData & 0x000f0000) >> 16); + } + bool H() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return ((UnwindData & 0x00100000) >> 20); + } + uint8_t CR() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return ((UnwindData & 0x600000) >> 21); + } + uint16_t FrameSize() const { + assert((Flag() == RuntimeFunctionFlag::RFF_Packed || + Flag() == RuntimeFunctionFlag::RFF_PackedFragment) && + "packed form required for this operation"); + return ((UnwindData & 0xff800000) >> 23); + } +}; + /// ExceptionDataRecord - An entry in the table of exception data (.xdata) /// /// The format on ARM is: @@ -416,12 +498,13 @@ struct ExceptionDataRecord { uint32_t ExceptionHandlerRVA() const { assert(X() && "Exception Handler RVA is only valid if the X bit is set"); - return Data[HeaderWords(*this) + EpilogueCount() + CodeWords()]; + return Data[HeaderWords(*this) + (E() ? 0 : EpilogueCount()) + CodeWords()]; } uint32_t ExceptionHandlerParameter() const { assert(X() && "Exception Handler RVA is only valid if the X bit is set"); - return Data[HeaderWords(*this) + EpilogueCount() + CodeWords() + 1]; + return Data[HeaderWords(*this) + (E() ? 0 : EpilogueCount()) + CodeWords() + + 1]; } }; diff --git a/llvm/include/llvm/Support/AlignOf.h b/llvm/include/llvm/Support/AlignOf.h index eb42542b777f..f586d7f182aa 100644 --- a/llvm/include/llvm/Support/AlignOf.h +++ b/llvm/include/llvm/Support/AlignOf.h @@ -13,41 +13,20 @@ #ifndef LLVM_SUPPORT_ALIGNOF_H #define LLVM_SUPPORT_ALIGNOF_H -#include "llvm/Support/Compiler.h" -#include <cstddef> +#include <type_traits> namespace llvm { -namespace detail { - -template <typename T, typename... Ts> class AlignerImpl { - T t; - AlignerImpl<Ts...> rest; - AlignerImpl() = delete; -}; - -template <typename T> class AlignerImpl<T> { - T t; - AlignerImpl() = delete; -}; - -template <typename T, typename... Ts> union SizerImpl { - char arr[sizeof(T)]; - SizerImpl<Ts...> rest; -}; - -template <typename T> union SizerImpl<T> { char arr[sizeof(T)]; }; -} // end namespace detail - /// A suitably aligned and sized character array member which can hold elements /// of any type. /// -/// These types may be arrays, structs, or any other types. This exposes a -/// `buffer` member which can be used as suitable storage for a placement new of -/// any of these types. +/// This template is equivalent to std::aligned_union_t<1, ...>, but we cannot +/// use it due to a bug in the MSVC x86 compiler: +/// https://github.com/microsoft/STL/issues/1533 +/// Using `alignas` here works around the bug. template <typename T, typename... Ts> struct AlignedCharArrayUnion { - alignas(::llvm::detail::AlignerImpl<T, Ts...>) char buffer[sizeof( - llvm::detail::SizerImpl<T, Ts...>)]; + using AlignedUnion = std::aligned_union_t<1, T, Ts...>; + alignas(alignof(AlignedUnion)) char buffer[sizeof(AlignedUnion)]; }; } // end namespace llvm diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h index 40c967ccc485..245432debce6 100644 --- a/llvm/include/llvm/Support/Allocator.h +++ b/llvm/include/llvm/Support/Allocator.h @@ -66,7 +66,8 @@ template <typename AllocatorT = MallocAllocator, size_t SlabSize = 4096, size_t SizeThreshold = SlabSize, size_t GrowthDelay = 128> class BumpPtrAllocatorImpl : public AllocatorBase<BumpPtrAllocatorImpl<AllocatorT, SlabSize, - SizeThreshold, GrowthDelay>> { + SizeThreshold, GrowthDelay>>, + private AllocatorT { public: static_assert(SizeThreshold <= SlabSize, "The SizeThreshold must be at most the SlabSize to ensure " @@ -80,15 +81,15 @@ public: template <typename T> BumpPtrAllocatorImpl(T &&Allocator) - : Allocator(std::forward<T &&>(Allocator)) {} + : AllocatorT(std::forward<T &&>(Allocator)) {} // Manually implement a move constructor as we must clear the old allocator's // slabs as a matter of correctness. BumpPtrAllocatorImpl(BumpPtrAllocatorImpl &&Old) - : CurPtr(Old.CurPtr), End(Old.End), Slabs(std::move(Old.Slabs)), + : AllocatorT(static_cast<AllocatorT &&>(Old)), CurPtr(Old.CurPtr), + End(Old.End), Slabs(std::move(Old.Slabs)), CustomSizedSlabs(std::move(Old.CustomSizedSlabs)), - BytesAllocated(Old.BytesAllocated), RedZoneSize(Old.RedZoneSize), - Allocator(std::move(Old.Allocator)) { + BytesAllocated(Old.BytesAllocated), RedZoneSize(Old.RedZoneSize) { Old.CurPtr = Old.End = nullptr; Old.BytesAllocated = 0; Old.Slabs.clear(); @@ -110,7 +111,7 @@ public: RedZoneSize = RHS.RedZoneSize; Slabs = std::move(RHS.Slabs); CustomSizedSlabs = std::move(RHS.CustomSizedSlabs); - Allocator = std::move(RHS.Allocator); + AllocatorT::operator=(static_cast<AllocatorT &&>(RHS)); RHS.CurPtr = RHS.End = nullptr; RHS.BytesAllocated = 0; @@ -170,7 +171,8 @@ public: // If Size is really big, allocate a separate slab for it. size_t PaddedSize = SizeToAllocate + Alignment.value() - 1; if (PaddedSize > SizeThreshold) { - void *NewSlab = Allocator.Allocate(PaddedSize, alignof(std::max_align_t)); + void *NewSlab = + AllocatorT::Allocate(PaddedSize, alignof(std::max_align_t)); // We own the new slab and don't want anyone reading anyting other than // pieces returned from this method. So poison the whole slab. __asan_poison_memory_region(NewSlab, PaddedSize); @@ -315,9 +317,6 @@ private: /// a sanitizer. size_t RedZoneSize = 1; - /// The allocator instance we use to get slabs of memory. - AllocatorT Allocator; - static size_t computeSlabSize(unsigned SlabIdx) { // Scale the actual allocated slab size based on the number of slabs // allocated. Every GrowthDelay slabs allocated, we double @@ -333,7 +332,7 @@ private: size_t AllocatedSlabSize = computeSlabSize(Slabs.size()); void *NewSlab = - Allocator.Allocate(AllocatedSlabSize, alignof(std::max_align_t)); + AllocatorT::Allocate(AllocatedSlabSize, alignof(std::max_align_t)); // We own the new slab and don't want anyone reading anything other than // pieces returned from this method. So poison the whole slab. __asan_poison_memory_region(NewSlab, AllocatedSlabSize); @@ -349,7 +348,7 @@ private: for (; I != E; ++I) { size_t AllocatedSlabSize = computeSlabSize(std::distance(Slabs.begin(), I)); - Allocator.Deallocate(*I, AllocatedSlabSize, alignof(std::max_align_t)); + AllocatorT::Deallocate(*I, AllocatedSlabSize, alignof(std::max_align_t)); } } @@ -358,7 +357,7 @@ private: for (auto &PtrAndSize : CustomSizedSlabs) { void *Ptr = PtrAndSize.first; size_t Size = PtrAndSize.second; - Allocator.Deallocate(Ptr, Size, alignof(std::max_align_t)); + AllocatorT::Deallocate(Ptr, Size, alignof(std::max_align_t)); } } diff --git a/llvm/include/llvm/Support/AtomicOrdering.h b/llvm/include/llvm/Support/AtomicOrdering.h index a8d89955fa2b..27ca825cef46 100644 --- a/llvm/include/llvm/Support/AtomicOrdering.h +++ b/llvm/include/llvm/Support/AtomicOrdering.h @@ -21,7 +21,7 @@ namespace llvm { -/// Atomic ordering for C11 / C++11's memody models. +/// Atomic ordering for C11 / C++11's memory models. /// /// These values cannot change because they are shared with standard library /// implementations as well as with other compilers. @@ -87,7 +87,7 @@ inline const char *toIRString(AtomicOrdering ao) { /// Returns true if ao is stronger than other as defined by the AtomicOrdering /// lattice, which is based on C++'s definition. -inline bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other) { +inline bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other) { static const bool lookup[8][8] = { // NA UN RX CO AC RE AR SC /* NotAtomic */ {false, false, false, false, false, false, false, false}, @@ -99,10 +99,10 @@ inline bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other) { /* acq_rel */ { true, true, true, true, true, true, false, false}, /* seq_cst */ { true, true, true, true, true, true, true, false}, }; - return lookup[static_cast<size_t>(ao)][static_cast<size_t>(other)]; + return lookup[static_cast<size_t>(AO)][static_cast<size_t>(Other)]; } -inline bool isAtLeastOrStrongerThan(AtomicOrdering ao, AtomicOrdering other) { +inline bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other) { static const bool lookup[8][8] = { // NA UN RX CO AC RE AR SC /* NotAtomic */ { true, false, false, false, false, false, false, false}, @@ -114,26 +114,26 @@ inline bool isAtLeastOrStrongerThan(AtomicOrdering ao, AtomicOrdering other) { /* acq_rel */ { true, true, true, true, true, true, true, false}, /* seq_cst */ { true, true, true, true, true, true, true, true}, }; - return lookup[static_cast<size_t>(ao)][static_cast<size_t>(other)]; + return lookup[static_cast<size_t>(AO)][static_cast<size_t>(Other)]; } -inline bool isStrongerThanUnordered(AtomicOrdering ao) { - return isStrongerThan(ao, AtomicOrdering::Unordered); +inline bool isStrongerThanUnordered(AtomicOrdering AO) { + return isStrongerThan(AO, AtomicOrdering::Unordered); } -inline bool isStrongerThanMonotonic(AtomicOrdering ao) { - return isStrongerThan(ao, AtomicOrdering::Monotonic); +inline bool isStrongerThanMonotonic(AtomicOrdering AO) { + return isStrongerThan(AO, AtomicOrdering::Monotonic); } -inline bool isAcquireOrStronger(AtomicOrdering ao) { - return isAtLeastOrStrongerThan(ao, AtomicOrdering::Acquire); +inline bool isAcquireOrStronger(AtomicOrdering AO) { + return isAtLeastOrStrongerThan(AO, AtomicOrdering::Acquire); } -inline bool isReleaseOrStronger(AtomicOrdering ao) { - return isAtLeastOrStrongerThan(ao, AtomicOrdering::Release); +inline bool isReleaseOrStronger(AtomicOrdering AO) { + return isAtLeastOrStrongerThan(AO, AtomicOrdering::Release); } -inline AtomicOrderingCABI toCABI(AtomicOrdering ao) { +inline AtomicOrderingCABI toCABI(AtomicOrdering AO) { static const AtomicOrderingCABI lookup[8] = { /* NotAtomic */ AtomicOrderingCABI::relaxed, /* Unordered */ AtomicOrderingCABI::relaxed, @@ -144,7 +144,7 @@ inline AtomicOrderingCABI toCABI(AtomicOrdering ao) { /* acq_rel */ AtomicOrderingCABI::acq_rel, /* seq_cst */ AtomicOrderingCABI::seq_cst, }; - return lookup[static_cast<size_t>(ao)]; + return lookup[static_cast<size_t>(AO)]; } } // end namespace llvm diff --git a/llvm/include/llvm/Support/BinaryItemStream.h b/llvm/include/llvm/Support/BinaryItemStream.h index 4cd66adcc01a..4d27013ce368 100644 --- a/llvm/include/llvm/Support/BinaryItemStream.h +++ b/llvm/include/llvm/Support/BinaryItemStream.h @@ -88,8 +88,7 @@ private: if (Offset >= getLength()) return make_error<BinaryStreamError>(stream_error_code::stream_too_short); ++Offset; - auto Iter = - std::lower_bound(ItemEndOffsets.begin(), ItemEndOffsets.end(), Offset); + auto Iter = llvm::lower_bound(ItemEndOffsets, Offset); size_t Idx = std::distance(ItemEndOffsets.begin(), Iter); assert(Idx < Items.size() && "binary search for offset failed"); return Idx; diff --git a/llvm/include/llvm/Support/BinaryStreamRef.h b/llvm/include/llvm/Support/BinaryStreamRef.h index 5375d6a3a761..ba4c3873586d 100644 --- a/llvm/include/llvm/Support/BinaryStreamRef.h +++ b/llvm/include/llvm/Support/BinaryStreamRef.h @@ -121,12 +121,12 @@ public: bool valid() const { return BorrowedImpl != nullptr; } - bool operator==(const RefType &Other) const { - if (BorrowedImpl != Other.BorrowedImpl) + friend bool operator==(const RefType &LHS, const RefType &RHS) { + if (LHS.BorrowedImpl != RHS.BorrowedImpl) return false; - if (ViewOffset != Other.ViewOffset) + if (LHS.ViewOffset != RHS.ViewOffset) return false; - if (Length != Other.Length) + if (LHS.Length != RHS.Length) return false; return true; } diff --git a/llvm/include/llvm/Support/CFGDiff.h b/llvm/include/llvm/Support/CFGDiff.h index 94734ce70e02..c90b9aca78b5 100644 --- a/llvm/include/llvm/Support/CFGDiff.h +++ b/llvm/include/llvm/Support/CFGDiff.h @@ -30,67 +30,43 @@ // a non-inversed graph, the children are naturally the successors when // InverseEdge is false and the predecessors when InverseEdge is true. -// We define two base clases that call into GraphDiff, one for successors -// (CFGSuccessors), where InverseEdge is false, and one for predecessors -// (CFGPredecessors), where InverseEdge is true. -// FIXME: Further refactoring may merge the two base classes into a single one -// templated / parametrized on using succ_iterator/pred_iterator and false/true -// for the InverseEdge. - -// CFGViewChildren and CFGViewPredecessors, both can be parametrized to -// consider the graph inverted or not (i.e. InverseGraph). Successors -// implicitly has InverseEdge = false and Predecessors implicitly has -// InverseEdge = true (see calls to GraphDiff methods in there). The GraphTraits -// instantiations that follow define the value of InverseGraph. - -// GraphTraits instantiations: -// - GraphDiff<BasicBlock *> is equivalent to InverseGraph = false -// - GraphDiff<Inverse<BasicBlock *>> is equivalent to InverseGraph = true -// - second pair item is BasicBlock *, then InverseEdge = false (so it inherits -// from CFGViewChildren). -// - second pair item is Inverse<BasicBlock *>, then InverseEdge = true (so it -// inherits from CFGViewPredecessors). - -// The 4 GraphTraits are as follows: -// 1. std::pair<const GraphDiff<BasicBlock *> *, BasicBlock *>> : -// CFGViewChildren<false> -// Regular CFG, children means successors, InverseGraph = false, -// InverseEdge = false. -// 2. std::pair<const GraphDiff<Inverse<BasicBlock *>> *, BasicBlock *>> : -// CFGViewChildren<true> -// Reverse the graph, get successors but reverse-apply updates, -// InverseGraph = true, InverseEdge = false. -// 3. std::pair<const GraphDiff<BasicBlock *> *, Inverse<BasicBlock *>>> : -// CFGViewPredecessors<false> -// Regular CFG, reverse edges, so children mean predecessors, -// InverseGraph = false, InverseEdge = true. -// 4. std::pair<const GraphDiff<Inverse<BasicBlock *>> *, Inverse<BasicBlock *>> -// : CFGViewPredecessors<true> -// Reverse the graph and the edges, InverseGraph = true, InverseEdge = true. - namespace llvm { -// GraphDiff defines a CFG snapshot: given a set of Update<NodePtr>, provide -// utilities to skip edges marked as deleted and return a set of edges marked as -// newly inserted. The current diff treats the CFG as a graph rather than a +namespace detail { +template <typename Range> +auto reverse_if_helper(Range &&R, std::integral_constant<bool, false>) { + return std::forward<Range>(R); +} + +template <typename Range> +auto reverse_if_helper(Range &&R, std::integral_constant<bool, true>) { + return llvm::reverse(std::forward<Range>(R)); +} + +template <bool B, typename Range> auto reverse_if(Range &&R) { + return reverse_if_helper(std::forward<Range>(R), + std::integral_constant<bool, B>{}); +} +} // namespace detail + +// GraphDiff defines a CFG snapshot: given a set of Update<NodePtr>, provides +// a getChildren method to get a Node's children based on the additional updates +// in the snapshot. The current diff treats the CFG as a graph rather than a // multigraph. Added edges are pruned to be unique, and deleted edges will // remove all existing edges between two blocks. template <typename NodePtr, bool InverseGraph = false> class GraphDiff { - using UpdateMapType = SmallDenseMap<NodePtr, SmallVector<NodePtr, 2>>; - struct EdgesInsertedDeleted { - UpdateMapType Succ; - UpdateMapType Pred; + struct DeletesInserts { + SmallVector<NodePtr, 2> DI[2]; }; - // Store Deleted edges on position 0, and Inserted edges on position 1. - EdgesInsertedDeleted Edges[2]; + using UpdateMapType = SmallDenseMap<NodePtr, DeletesInserts>; + UpdateMapType Succ; + UpdateMapType Pred; + // By default, it is assumed that, given a CFG and a set of updates, we wish // to apply these updates as given. If UpdatedAreReverseApplied is set, the // updates will be applied in reverse: deleted edges are considered re-added // and inserted edges are considered deleted when returning children. bool UpdatedAreReverseApplied; - // Using a singleton empty vector for all node requests with no - // children. - SmallVector<NodePtr, 0> Empty; // Keep the list of legalized updates for a deterministic order of updates // when using a GraphDiff for incremental updates in the DominatorTree. @@ -98,14 +74,19 @@ template <typename NodePtr, bool InverseGraph = false> class GraphDiff { SmallVector<cfg::Update<NodePtr>, 4> LegalizedUpdates; void printMap(raw_ostream &OS, const UpdateMapType &M) const { - for (auto Pair : M) - for (auto Child : Pair.second) { - OS << "("; - Pair.first->printAsOperand(OS, false); - OS << ", "; - Child->printAsOperand(OS, false); - OS << ") "; + StringRef DIText[2] = {"Delete", "Insert"}; + for (auto Pair : M) { + for (unsigned IsInsert = 0; IsInsert <= 1; ++IsInsert) { + OS << DIText[IsInsert] << " edges: \n"; + for (auto Child : Pair.second.DI[IsInsert]) { + OS << "("; + Pair.first->printAsOperand(OS, false); + OS << ", "; + Child->printAsOperand(OS, false); + OS << ") "; + } } + } OS << "\n"; } @@ -113,15 +94,12 @@ public: GraphDiff() : UpdatedAreReverseApplied(false) {} GraphDiff(ArrayRef<cfg::Update<NodePtr>> Updates, bool ReverseApplyUpdates = false) { - cfg::LegalizeUpdates<NodePtr>(Updates, LegalizedUpdates, InverseGraph, - /*ReverseResultOrder=*/true); - // The legalized updates are stored in reverse so we can pop_back when doing - // incremental updates. + cfg::LegalizeUpdates<NodePtr>(Updates, LegalizedUpdates, InverseGraph); for (auto U : LegalizedUpdates) { unsigned IsInsert = (U.getKind() == cfg::UpdateKind::Insert) == !ReverseApplyUpdates; - Edges[IsInsert].Succ[U.getFrom()].push_back(U.getTo()); - Edges[IsInsert].Pred[U.getTo()].push_back(U.getFrom()); + Succ[U.getFrom()].DI[IsInsert].push_back(U.getTo()); + Pred[U.getTo()].DI[IsInsert].push_back(U.getFrom()); } UpdatedAreReverseApplied = ReverseApplyUpdates; } @@ -137,55 +115,56 @@ public: auto U = LegalizedUpdates.pop_back_val(); unsigned IsInsert = (U.getKind() == cfg::UpdateKind::Insert) == !UpdatedAreReverseApplied; - auto &SuccList = Edges[IsInsert].Succ[U.getFrom()]; + auto &SuccDIList = Succ[U.getFrom()]; + auto &SuccList = SuccDIList.DI[IsInsert]; assert(SuccList.back() == U.getTo()); SuccList.pop_back(); - if (SuccList.empty()) - Edges[IsInsert].Succ.erase(U.getFrom()); + if (SuccList.empty() && SuccDIList.DI[!IsInsert].empty()) + Succ.erase(U.getFrom()); - auto &PredList = Edges[IsInsert].Pred[U.getTo()]; + auto &PredDIList = Pred[U.getTo()]; + auto &PredList = PredDIList.DI[IsInsert]; assert(PredList.back() == U.getFrom()); PredList.pop_back(); - if (PredList.empty()) - Edges[IsInsert].Pred.erase(U.getTo()); + if (PredList.empty() && PredDIList.DI[!IsInsert].empty()) + Pred.erase(U.getTo()); return U; } - bool ignoreChild(const NodePtr BB, NodePtr EdgeEnd, bool InverseEdge) const { - // Used to filter nullptr in clang. - if (EdgeEnd == nullptr) - return true; - auto &DeleteChildren = - (InverseEdge != InverseGraph) ? Edges[0].Pred : Edges[0].Succ; - auto It = DeleteChildren.find(BB); - if (It == DeleteChildren.end()) - return false; - auto &EdgesForBB = It->second; - return llvm::find(EdgesForBB, EdgeEnd) != EdgesForBB.end(); - } + using VectRet = SmallVector<NodePtr, 8>; + template <bool InverseEdge> VectRet getChildren(NodePtr N) const { + using DirectedNodeT = + std::conditional_t<InverseEdge, Inverse<NodePtr>, NodePtr>; + auto R = children<DirectedNodeT>(N); + VectRet Res = VectRet(detail::reverse_if<!InverseEdge>(R)); + + // Remove nullptr children for clang. + llvm::erase_value(Res, nullptr); + + auto &Children = (InverseEdge != InverseGraph) ? Pred : Succ; + auto It = Children.find(N); + if (It == Children.end()) + return Res; - iterator_range<typename SmallVectorImpl<NodePtr>::const_iterator> - getAddedChildren(const NodePtr BB, bool InverseEdge) const { - auto &InsertChildren = - (InverseEdge != InverseGraph) ? Edges[1].Pred : Edges[1].Succ; - auto It = InsertChildren.find(BB); - if (It == InsertChildren.end()) - return make_range(Empty.begin(), Empty.end()); - return make_range(It->second.begin(), It->second.end()); + // Remove children present in the CFG but not in the snapshot. + for (auto *Child : It->second.DI[0]) + llvm::erase_value(Res, Child); + + // Add children present in the snapshot for not in the real CFG. + auto &AddedChildren = It->second.DI[1]; + llvm::append_range(Res, AddedChildren); + + return Res; } void print(raw_ostream &OS) const { OS << "===== GraphDiff: CFG edge changes to create a CFG snapshot. \n" "===== (Note: notion of children/inverse_children depends on " "the direction of edges and the graph.)\n"; - OS << "Children to insert:\n\t"; - printMap(OS, Edges[1].Succ); - OS << "Children to delete:\n\t"; - printMap(OS, Edges[0].Succ); - OS << "Inverse_children to insert:\n\t"; - printMap(OS, Edges[1].Pred); - OS << "Inverse_children to delete:\n\t"; - printMap(OS, Edges[0].Pred); + OS << "Children to delete/insert:\n\t"; + printMap(OS, Succ); + OS << "Inverse_children to delete/insert:\n\t"; + printMap(OS, Pred); OS << "\n"; } @@ -193,58 +172,6 @@ public: LLVM_DUMP_METHOD void dump() const { print(dbgs()); } #endif }; - -template <typename GraphT, bool InverseGraph = false, bool InverseEdge = false, - typename GT = GraphTraits<GraphT>> -struct CFGViewChildren { - using DataRef = const GraphDiff<typename GT::NodeRef, InverseGraph> *; - using NodeRef = std::pair<DataRef, typename GT::NodeRef>; - - template<typename Range> - static auto makeChildRange(Range &&R, DataRef DR) { - using Iter = WrappedPairNodeDataIterator<decltype(std::forward<Range>(R).begin()), NodeRef, DataRef>; - return make_range(Iter(R.begin(), DR), Iter(R.end(), DR)); - } - - static auto children(NodeRef N) { - - // filter iterator init: - auto R = make_range(GT::child_begin(N.second), GT::child_end(N.second)); - // This lambda is copied into the iterators and persists to callers, ensure - // captures are by value or otherwise have sufficient lifetime. - auto First = make_filter_range(makeChildRange(R, N.first), [N](NodeRef C) { - return !C.first->ignoreChild(N.second, C.second, InverseEdge); - }); - - // new inserts iterator init: - auto InsertVec = N.first->getAddedChildren(N.second, InverseEdge); - auto Second = makeChildRange(InsertVec, N.first); - - auto CR = concat<NodeRef>(First, Second); - - // concat_range contains references to other ranges, returning it would - // leave those references dangling - the iterators contain - // other iterators by value so they're safe to return. - return make_range(CR.begin(), CR.end()); - } - - static auto child_begin(NodeRef N) { - return children(N).begin(); - } - - static auto child_end(NodeRef N) { - return children(N).end(); - } - - using ChildIteratorType = decltype(child_end(std::declval<NodeRef>())); -}; - -template <typename T, bool B> -struct GraphTraits<std::pair<const GraphDiff<T, B> *, T>> - : CFGViewChildren<T, B> {}; -template <typename T, bool B> -struct GraphTraits<std::pair<const GraphDiff<T, B> *, Inverse<T>>> - : CFGViewChildren<Inverse<T>, B, true> {}; } // end namespace llvm #endif // LLVM_SUPPORT_CFGDIFF_H diff --git a/llvm/include/llvm/Support/CFGUpdate.h b/llvm/include/llvm/Support/CFGUpdate.h index af4cd6ed1f1d..3a12b9d86c18 100644 --- a/llvm/include/llvm/Support/CFGUpdate.h +++ b/llvm/include/llvm/Support/CFGUpdate.h @@ -14,7 +14,6 @@ #ifndef LLVM_SUPPORT_CFGUPDATE_H #define LLVM_SUPPORT_CFGUPDATE_H -#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/Compiler.h" diff --git a/llvm/include/llvm/Support/CheckedArithmetic.h b/llvm/include/llvm/Support/CheckedArithmetic.h index 035e4533322c..09e6d7ec95dc 100644 --- a/llvm/include/llvm/Support/CheckedArithmetic.h +++ b/llvm/include/llvm/Support/CheckedArithmetic.h @@ -28,8 +28,8 @@ template <typename T, typename F> std::enable_if_t<std::is_integral<T>::value && sizeof(T) * 8 <= 64, llvm::Optional<T>> checkedOp(T LHS, T RHS, F Op, bool Signed = true) { - llvm::APInt ALHS(/*BitSize=*/sizeof(T) * 8, LHS, Signed); - llvm::APInt ARHS(/*BitSize=*/sizeof(T) * 8, RHS, Signed); + llvm::APInt ALHS(sizeof(T) * 8, LHS, Signed); + llvm::APInt ARHS(sizeof(T) * 8, RHS, Signed); bool Overflow; llvm::APInt Out = (ALHS.*Op)(ARHS, Overflow); if (Overflow) diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 466945e40a9c..38f3e188be55 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -71,13 +71,6 @@ bool ParseCommandLineOptions(int argc, const char *const *argv, const char *EnvVar = nullptr, bool LongOptionsUseDoubleDash = false); -//===----------------------------------------------------------------------===// -// ParseEnvironmentOptions - Environment variable option processing alternate -// entry point. -// -void ParseEnvironmentOptions(const char *progName, const char *envvar, - const char *Overview = ""); - // Function pointer type for printing version information. using VersionPrinterTy = std::function<void(raw_ostream &)>; @@ -679,7 +672,7 @@ public: : Values(Options) {} template <class Opt> void apply(Opt &O) const { - for (auto Value : Values) + for (const auto &Value : Values) O.getParser().addLiteralOption(Value.Name, Value.Value, Value.Description); } @@ -1488,7 +1481,7 @@ public: template <class... Mods> explicit opt(const Mods &... Ms) - : Option(Optional, NotHidden), Parser(*this) { + : Option(llvm::cl::Optional, NotHidden), Parser(*this) { apply(this, Ms...); done(); } @@ -2092,6 +2085,14 @@ bool ExpandResponseFiles( llvm::vfs::FileSystem &FS = *llvm::vfs::getRealFileSystem(), llvm::Optional<llvm::StringRef> CurrentDir = llvm::None); +/// A convenience helper which concatenates the options specified by the +/// environment variable EnvVar and command line options, then expands response +/// files recursively. The tokenizer is a predefined GNU or Windows one. +/// \return true if all @files were expanded successfully or there were none. +bool expandResponseFiles(int Argc, const char *const *Argv, const char *EnvVar, + StringSaver &Saver, + SmallVectorImpl<const char *> &NewArgv); + /// Mark all options not part of this category as cl::ReallyHidden. /// /// \param Category the category of options to keep displaying diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h index 80ea76240d6c..9348ada91325 100644 --- a/llvm/include/llvm/Support/Compiler.h +++ b/llvm/include/llvm/Support/Compiler.h @@ -146,7 +146,7 @@ /// LLVM_NODISCARD - Warn if a type or return value is discarded. // Use the 'nodiscard' attribute in C++17 or newer mode. -#if __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(nodiscard) +#if defined(__cplusplus) && __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(nodiscard) #define LLVM_NODISCARD [[nodiscard]] #elif LLVM_HAS_CPP_ATTRIBUTE(clang::warn_unused_result) #define LLVM_NODISCARD [[clang::warn_unused_result]] @@ -234,11 +234,11 @@ /// 3.4 supported this but is buggy in various cases and produces unimplemented /// errors, just use it in GCC 4.0 and later. #if __has_attribute(always_inline) || LLVM_GNUC_PREREQ(4, 0, 0) -#define LLVM_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline)) +#define LLVM_ATTRIBUTE_ALWAYS_INLINE inline __attribute__((always_inline)) #elif defined(_MSC_VER) #define LLVM_ATTRIBUTE_ALWAYS_INLINE __forceinline #else -#define LLVM_ATTRIBUTE_ALWAYS_INLINE +#define LLVM_ATTRIBUTE_ALWAYS_INLINE inline #endif #ifdef __GNUC__ @@ -268,7 +268,7 @@ #endif /// LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements. -#if __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(fallthrough) +#if defined(__cplusplus) && __cplusplus > 201402L && LLVM_HAS_CPP_ATTRIBUTE(fallthrough) #define LLVM_FALLTHROUGH [[fallthrough]] #elif LLVM_HAS_CPP_ATTRIBUTE(gnu::fallthrough) #define LLVM_FALLTHROUGH [[gnu::fallthrough]] @@ -314,19 +314,9 @@ #endif // LLVM_ATTRIBUTE_DEPRECATED(decl, "message") -#if __has_feature(attribute_deprecated_with_message) -# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \ - decl __attribute__((deprecated(message))) -#elif defined(__GNUC__) -# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \ - decl __attribute__((deprecated)) -#elif defined(_MSC_VER) -# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \ - __declspec(deprecated(message)) decl -#else -# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \ - decl -#endif +// This macro will be removed. +// Use C++14's attribute instead: [[deprecated("message")]] +#define LLVM_ATTRIBUTE_DEPRECATED(decl, message) [[deprecated(message)]] decl /// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands /// to an expression which states that it is undefined behavior for the diff --git a/llvm/include/llvm/Support/CrashRecoveryContext.h b/llvm/include/llvm/Support/CrashRecoveryContext.h index 61a1bd405a4d..f756635ee1f9 100644 --- a/llvm/include/llvm/Support/CrashRecoveryContext.h +++ b/llvm/include/llvm/Support/CrashRecoveryContext.h @@ -44,11 +44,11 @@ class CrashRecoveryContextCleanup; /// executed in any case, whether crash occurs or not. These actions may be used /// to reclaim resources in the case of crash. class CrashRecoveryContext { - void *Impl; - CrashRecoveryContextCleanup *head; + void *Impl = nullptr; + CrashRecoveryContextCleanup *head = nullptr; public: - CrashRecoveryContext() : Impl(nullptr), head(nullptr) {} + CrashRecoveryContext(); ~CrashRecoveryContext(); /// Register cleanup handler, which is used when the recovery context is @@ -102,6 +102,10 @@ public: LLVM_ATTRIBUTE_NORETURN void HandleExit(int RetCode); + /// Throw again a signal or an exception, after it was catched once by a + /// CrashRecoveryContext. + static bool throwIfCrash(int RetCode); + /// In case of a crash, this is the crash identifier. int RetCode = 0; @@ -181,7 +185,7 @@ public: : CrashRecoveryContextCleanupBase< CrashRecoveryContextDestructorCleanup<T>, T>(context, resource) {} - virtual void recoverResources() { + void recoverResources() override { this->resource->~T(); } }; diff --git a/llvm/include/llvm/Support/DOTGraphTraits.h b/llvm/include/llvm/Support/DOTGraphTraits.h index ec01b7d9576a..a73538fa1462 100644 --- a/llvm/include/llvm/Support/DOTGraphTraits.h +++ b/llvm/include/llvm/Support/DOTGraphTraits.h @@ -60,7 +60,8 @@ public: /// isNodeHidden - If the function returns true, the given node is not /// displayed in the graph. - static bool isNodeHidden(const void *) { + template <typename GraphType> + static bool isNodeHidden(const void *, const GraphType &) { return false; } diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h index 9dd1bb7cb96d..c0f7c10aefb4 100644 --- a/llvm/include/llvm/Support/Error.h +++ b/llvm/include/llvm/Support/Error.h @@ -629,22 +629,22 @@ private: storage_type *getStorage() { assert(!HasError && "Cannot get value when an error exists!"); - return reinterpret_cast<storage_type *>(TStorage.buffer); + return reinterpret_cast<storage_type *>(&TStorage); } const storage_type *getStorage() const { assert(!HasError && "Cannot get value when an error exists!"); - return reinterpret_cast<const storage_type *>(TStorage.buffer); + return reinterpret_cast<const storage_type *>(&TStorage); } error_type *getErrorStorage() { assert(HasError && "Cannot get error when a value exists!"); - return reinterpret_cast<error_type *>(ErrorStorage.buffer); + return reinterpret_cast<error_type *>(&ErrorStorage); } const error_type *getErrorStorage() const { assert(HasError && "Cannot get error when a value exists!"); - return reinterpret_cast<const error_type *>(ErrorStorage.buffer); + return reinterpret_cast<const error_type *>(&ErrorStorage); } // Used by ExpectedAsOutParameter to reset the checked flag. diff --git a/llvm/include/llvm/Support/ErrorHandling.h b/llvm/include/llvm/Support/ErrorHandling.h index 7cbc668b3a0e..0ec0242d569d 100644 --- a/llvm/include/llvm/Support/ErrorHandling.h +++ b/llvm/include/llvm/Support/ErrorHandling.h @@ -110,9 +110,9 @@ void install_out_of_memory_new_handler(); /// the following unwind succeeds, e.g. do not trigger additional allocations /// in the unwind chain. /// -/// If no error handler is installed (default), then a bad_alloc exception -/// is thrown, if LLVM is compiled with exception support, otherwise an -/// assertion is called. +/// If no error handler is installed (default), throws a bad_alloc exception +/// if LLVM is compiled with exception support. Otherwise prints the error +/// to standard error and calls abort(). LLVM_ATTRIBUTE_NORETURN void report_bad_alloc_error(const char *Reason, bool GenCrashDiag = true); diff --git a/llvm/include/llvm/Support/ErrorOr.h b/llvm/include/llvm/Support/ErrorOr.h index 1fbccc1d1e26..b654c9c9c43b 100644 --- a/llvm/include/llvm/Support/ErrorOr.h +++ b/llvm/include/llvm/Support/ErrorOr.h @@ -235,17 +235,17 @@ private: storage_type *getStorage() { assert(!HasError && "Cannot get value when an error exists!"); - return reinterpret_cast<storage_type*>(TStorage.buffer); + return reinterpret_cast<storage_type *>(&TStorage); } const storage_type *getStorage() const { assert(!HasError && "Cannot get value when an error exists!"); - return reinterpret_cast<const storage_type*>(TStorage.buffer); + return reinterpret_cast<const storage_type *>(&TStorage); } std::error_code *getErrorStorage() { assert(HasError && "Cannot get error when a value exists!"); - return reinterpret_cast<std::error_code *>(ErrorStorage.buffer); + return reinterpret_cast<std::error_code *>(&ErrorStorage); } const std::error_code *getErrorStorage() const { diff --git a/llvm/include/llvm/Support/ExitCodes.h b/llvm/include/llvm/Support/ExitCodes.h new file mode 100644 index 000000000000..b9041f5557d5 --- /dev/null +++ b/llvm/include/llvm/Support/ExitCodes.h @@ -0,0 +1,33 @@ +//===-- llvm/Support/ExitCodes.h - Exit codes for exit() -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains definitions of exit codes for exit() function. They are +/// either defined by sysexits.h if it is supported, or defined here if +/// sysexits.h is not supported. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_EXITCODES_H +#define LLVM_SUPPORT_EXITCODES_H + +#include "llvm/Config/llvm-config.h" + +#if HAVE_SYSEXITS_H +#include <sysexits.h> +#elif __MVS__ +// <sysexits.h> does not exist on z/OS. The only value used in LLVM is +// EX_IOERR, which is used to signal a special error condition (broken pipe). +// Define the macro with its usual value from BSD systems, which is chosen to +// not clash with more standard exit codes like 1. +#define EX_IOERR 74 +#elif LLVM_ON_UNIX +#error Exit code EX_IOERR not available +#endif + +#endif diff --git a/llvm/include/llvm/Support/FileCollector.h b/llvm/include/llvm/Support/FileCollector.h index 2b5e9c669b68..8ea344a347d3 100644 --- a/llvm/include/llvm/Support/FileCollector.h +++ b/llvm/include/llvm/Support/FileCollector.h @@ -20,6 +20,35 @@ namespace llvm { class FileCollectorFileSystem; class Twine; +class FileCollectorBase { +public: + FileCollectorBase(); + virtual ~FileCollectorBase(); + + void addFile(const Twine &file); + void addDirectory(const Twine &Dir); + +protected: + bool markAsSeen(StringRef Path) { + if (Path.empty()) + return false; + return Seen.insert(Path).second; + } + + virtual void addFileImpl(StringRef SrcPath) = 0; + + virtual llvm::vfs::directory_iterator + addDirectoryImpl(const llvm::Twine &Dir, + IntrusiveRefCntPtr<vfs::FileSystem> FS, + std::error_code &EC) = 0; + + /// Synchronizes access to internal data structures. + std::mutex Mutex; + + /// Tracks already seen files so they can be skipped. + StringSet<> Seen; +}; + /// Captures file system interaction and generates data to be later replayed /// with the RedirectingFileSystem. /// @@ -38,16 +67,34 @@ class Twine; /// /// In order to preserve the relative topology of files we use their real paths /// as relative paths inside of the Root. -class FileCollector { +class FileCollector : public FileCollectorBase { public: + /// Helper utility that encapsulates the logic for canonicalizing a virtual + /// path and a path to copy from. + class PathCanonicalizer { + public: + struct PathStorage { + SmallString<256> CopyFrom; + SmallString<256> VirtualPath; + }; + + /// Canonicalize a pair of virtual and real paths. + PathStorage canonicalize(StringRef SrcPath); + + private: + /// Replace with a (mostly) real path, or don't modify. Resolves symlinks + /// in the directory, using \a CachedDirs to avoid redundant lookups, but + /// leaves the filename as a possible symlink. + void updateWithRealPath(SmallVectorImpl<char> &Path); + + StringMap<std::string> CachedDirs; + }; + /// \p Root is the directory where collected files are will be stored. /// \p OverlayRoot is VFS mapping root. /// \p Root directory gets created in copyFiles unless it already exists. FileCollector(std::string Root, std::string OverlayRoot); - void addFile(const Twine &file); - void addDirectory(const Twine &Dir); - /// Write the yaml mapping (for the VFS) to the given file. std::error_code writeMapping(StringRef MappingFile); @@ -67,14 +114,6 @@ public: private: friend FileCollectorFileSystem; - bool markAsSeen(StringRef Path) { - if (Path.empty()) - return false; - return Seen.insert(Path).second; - } - - bool getRealPath(StringRef SrcPath, SmallVectorImpl<char> &Result); - void addFileToMapping(StringRef VirtualPath, StringRef RealPath) { if (sys::fs::is_directory(VirtualPath)) VFSWriter.addDirectoryMapping(VirtualPath, RealPath); @@ -83,14 +122,12 @@ private: } protected: - void addFileImpl(StringRef SrcPath); + void addFileImpl(StringRef SrcPath) override; llvm::vfs::directory_iterator addDirectoryImpl(const llvm::Twine &Dir, - IntrusiveRefCntPtr<vfs::FileSystem> FS, std::error_code &EC); - - /// Synchronizes access to Seen, VFSWriter and SymlinkMap. - std::mutex Mutex; + IntrusiveRefCntPtr<vfs::FileSystem> FS, + std::error_code &EC) override; /// The directory where collected files are copied to in copyFiles(). const std::string Root; @@ -98,14 +135,11 @@ protected: /// The root directory where the VFS overlay lives. const std::string OverlayRoot; - /// Tracks already seen files so they can be skipped. - StringSet<> Seen; - /// The yaml mapping writer. vfs::YAMLVFSWriter VFSWriter; - /// Caches RealPath calls when resolving symlinks. - StringMap<std::string> SymlinkMap; + /// Helper utility for canonicalizing paths. + PathCanonicalizer Canonicalizer; }; } // end namespace llvm diff --git a/llvm/include/llvm/Support/FileSystem.h b/llvm/include/llvm/Support/FileSystem.h index a29a9d787947..2483aae046f5 100644 --- a/llvm/include/llvm/Support/FileSystem.h +++ b/llvm/include/llvm/Support/FileSystem.h @@ -34,6 +34,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem/UniqueID.h" #include "llvm/Support/MD5.h" #include <cassert> #include <cstdint> @@ -42,7 +43,6 @@ #include <stack> #include <string> #include <system_error> -#include <tuple> #include <vector> #ifdef HAVE_SYS_STAT_H @@ -131,26 +131,6 @@ inline perms operator~(perms x) { static_cast<unsigned short>(~static_cast<unsigned short>(x))); } -class UniqueID { - uint64_t Device; - uint64_t File; - -public: - UniqueID() = default; - UniqueID(uint64_t Device, uint64_t File) : Device(Device), File(File) {} - - bool operator==(const UniqueID &Other) const { - return Device == Other.Device && File == Other.File; - } - bool operator!=(const UniqueID &Other) const { return !(*this == Other); } - bool operator<(const UniqueID &Other) const { - return std::tie(Device, File) < std::tie(Other.Device, Other.File); - } - - uint64_t getDevice() const { return Device; } - uint64_t getFile() const { return File; } -}; - /// Represents the result of a call to directory_iterator::status(). This is a /// subset of the information returned by a regular sys::fs::status() call, and /// represents the information provided by Windows FileFirstFile/FindNextFile. @@ -1131,6 +1111,43 @@ Expected<file_t> openNativeFileForRead(const Twine &Name, OpenFlags Flags = OF_None, SmallVectorImpl<char> *RealPath = nullptr); +/// Try to locks the file during the specified time. +/// +/// This function implements advisory locking on entire file. If it returns +/// <em>errc::success</em>, the file is locked by the calling process. Until the +/// process unlocks the file by calling \a unlockFile, all attempts to lock the +/// same file will fail/block. The process that locked the file may assume that +/// none of other processes read or write this file, provided that all processes +/// lock the file prior to accessing its content. +/// +/// @param FD The descriptor representing the file to lock. +/// @param Timeout Time in milliseconds that the process should wait before +/// reporting lock failure. Zero value means try to get lock only +/// once. +/// @returns errc::success if lock is successfully obtained, +/// errc::no_lock_available if the file cannot be locked, or platform-specific +/// error_code otherwise. +/// +/// @note Care should be taken when using this function in a multithreaded +/// context, as it may not prevent other threads in the same process from +/// obtaining a lock on the same file, even if they are using a different file +/// descriptor. +std::error_code +tryLockFile(int FD, + std::chrono::milliseconds Timeout = std::chrono::milliseconds(0)); + +/// Lock the file. +/// +/// This function acts as @ref tryLockFile but it waits infinitely. +std::error_code lockFile(int FD); + +/// Unlock the file. +/// +/// @param FD The descriptor representing the file to unlock. +/// @returns errc::success if lock is successfully released or platform-specific +/// error_code otherwise. +std::error_code unlockFile(int FD); + /// @brief Close the file object. This should be used instead of ::close for /// portability. On error, the caller should assume the file is closed, as is /// the case for Process::SafelyCloseFileDescriptor @@ -1142,6 +1159,35 @@ openNativeFileForRead(const Twine &Name, OpenFlags Flags = OF_None, /// means that the filesystem may have failed to perform some buffered writes. std::error_code closeFile(file_t &F); +/// RAII class that facilitates file locking. +class FileLocker { + int FD; ///< Locked file handle. + FileLocker(int FD) : FD(FD) {} + friend class llvm::raw_fd_ostream; + +public: + FileLocker(const FileLocker &L) = delete; + FileLocker(FileLocker &&L) : FD(L.FD) { L.FD = -1; } + ~FileLocker() { + if (FD != -1) + unlockFile(FD); + } + FileLocker &operator=(FileLocker &&L) { + FD = L.FD; + L.FD = -1; + return *this; + } + FileLocker &operator=(const FileLocker &L) = delete; + std::error_code unlock() { + if (FD != -1) { + std::error_code Result = unlockFile(FD); + FD = -1; + return Result; + } + return std::error_code(); + } +}; + std::error_code getUniqueID(const Twine Path, UniqueID &Result); /// Get disk space usage information. diff --git a/llvm/include/llvm/Support/FileSystem/UniqueID.h b/llvm/include/llvm/Support/FileSystem/UniqueID.h new file mode 100644 index 000000000000..229410c8292e --- /dev/null +++ b/llvm/include/llvm/Support/FileSystem/UniqueID.h @@ -0,0 +1,52 @@ +//===- llvm/Support/FileSystem/UniqueID.h - UniqueID for files --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is cut out of llvm/Support/FileSystem.h to allow UniqueID to be +// reused without bloating the includes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H +#define LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H + +#include <cstdint> + +namespace llvm { +namespace sys { +namespace fs { + +class UniqueID { + uint64_t Device; + uint64_t File; + +public: + UniqueID() = default; + UniqueID(uint64_t Device, uint64_t File) : Device(Device), File(File) {} + + bool operator==(const UniqueID &Other) const { + return Device == Other.Device && File == Other.File; + } + bool operator!=(const UniqueID &Other) const { return !(*this == Other); } + bool operator<(const UniqueID &Other) const { + /// Don't use std::tie since it bloats the compile time of this header. + if (Device < Other.Device) + return true; + if (Other.Device < Device) + return false; + return File < Other.File; + } + + uint64_t getDevice() const { return Device; } + uint64_t getFile() const { return File; } +}; + +} // end namespace fs +} // end namespace sys +} // end namespace llvm + +#endif // LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H diff --git a/llvm/include/llvm/Support/FormatVariadic.h b/llvm/include/llvm/Support/FormatVariadic.h index dfafc3ccb44e..094b054f773f 100644 --- a/llvm/include/llvm/Support/FormatVariadic.h +++ b/llvm/include/llvm/Support/FormatVariadic.h @@ -205,10 +205,10 @@ public: // // The characters '{' and '}' are reserved and cannot appear anywhere within a // replacement sequence. Outside of a replacement sequence, in order to print -// a literal '{' or '}' it must be doubled -- "{{" to print a literal '{' and -// "}}" to print a literal '}'. +// a literal '{' it must be doubled as "{{". // // ===Parameter Indexing=== +// // `index` specifies the index of the parameter in the parameter pack to format // into the output. Note that it is possible to refer to the same parameter // index multiple times in a given format string. This makes it possible to diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h index 10e591a69d36..18e08dbcd175 100644 --- a/llvm/include/llvm/Support/GenericDomTree.h +++ b/llvm/include/llvm/Support/GenericDomTree.h @@ -28,6 +28,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CFGDiff.h" #include "llvm/Support/CFGUpdate.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -37,7 +38,6 @@ #include <memory> #include <type_traits> #include <utility> -#include <vector> namespace llvm { @@ -60,7 +60,7 @@ template <class NodeT> class DomTreeNodeBase { NodeT *TheBB; DomTreeNodeBase *IDom; unsigned Level; - std::vector<DomTreeNodeBase *> Children; + SmallVector<DomTreeNodeBase *, 4> Children; mutable unsigned DFSNumIn = ~0; mutable unsigned DFSNumOut = ~0; @@ -68,9 +68,9 @@ template <class NodeT> class DomTreeNodeBase { DomTreeNodeBase(NodeT *BB, DomTreeNodeBase *iDom) : TheBB(BB), IDom(iDom), Level(IDom ? IDom->Level + 1 : 0) {} - using iterator = typename std::vector<DomTreeNodeBase *>::iterator; + using iterator = typename SmallVector<DomTreeNodeBase *, 4>::iterator; using const_iterator = - typename std::vector<DomTreeNodeBase *>::const_iterator; + typename SmallVector<DomTreeNodeBase *, 4>::const_iterator; iterator begin() { return Children.begin(); } iterator end() { return Children.end(); } @@ -211,7 +211,10 @@ void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From, template <typename DomTreeT> void ApplyUpdates(DomTreeT &DT, - ArrayRef<typename DomTreeT::UpdateType> Updates); + GraphDiff<typename DomTreeT::NodePtr, + DomTreeT::IsPostDominator> &PreViewCFG, + GraphDiff<typename DomTreeT::NodePtr, + DomTreeT::IsPostDominator> *PostViewCFG); template <typename DomTreeT> bool Verify(const DomTreeT &DT, typename DomTreeT::VerificationLevel VL); @@ -460,8 +463,8 @@ protected: return this->Roots[0]; } - /// findNearestCommonDominator - Find nearest common dominator basic block - /// for basic block A and B. If there is no such block then return nullptr. + /// Find nearest common dominator basic block for basic block A and B. A and B + /// must have tree nodes. NodeT *findNearestCommonDominator(NodeT *A, NodeT *B) const { assert(A && B && "Pointers are not valid"); assert(A->getParent() == B->getParent() && @@ -477,18 +480,18 @@ protected: DomTreeNodeBase<NodeT> *NodeA = getNode(A); DomTreeNodeBase<NodeT> *NodeB = getNode(B); - - if (!NodeA || !NodeB) return nullptr; + assert(NodeA && "A must be in the tree"); + assert(NodeB && "B must be in the tree"); // Use level information to go up the tree until the levels match. Then // continue going up til we arrive at the same node. - while (NodeA && NodeA != NodeB) { + while (NodeA != NodeB) { if (NodeA->getLevel() < NodeB->getLevel()) std::swap(NodeA, NodeB); NodeA = NodeA->IDom; } - return NodeA ? NodeA->getBlock() : nullptr; + return NodeA->getBlock(); } const NodeT *findNearestCommonDominator(const NodeT *A, @@ -535,10 +538,39 @@ protected: /// The type of updates is the same for DomTreeBase<T> and PostDomTreeBase<T> /// with the same template parameter T. /// - /// \param Updates An unordered sequence of updates to perform. + /// \param Updates An unordered sequence of updates to perform. The current + /// CFG and the reverse of these updates provides the pre-view of the CFG. /// void applyUpdates(ArrayRef<UpdateType> Updates) { - DomTreeBuilder::ApplyUpdates(*this, Updates); + GraphDiff<NodePtr, IsPostDominator> PreViewCFG( + Updates, /*ReverseApplyUpdates=*/true); + DomTreeBuilder::ApplyUpdates(*this, PreViewCFG, nullptr); + } + + /// \param Updates An unordered sequence of updates to perform. The current + /// CFG and the reverse of these updates provides the pre-view of the CFG. + /// \param PostViewUpdates An unordered sequence of update to perform in order + /// to obtain a post-view of the CFG. The DT will be updated assuming the + /// obtained PostViewCFG is the desired end state. + void applyUpdates(ArrayRef<UpdateType> Updates, + ArrayRef<UpdateType> PostViewUpdates) { + if (Updates.empty()) { + GraphDiff<NodePtr, IsPostDom> PostViewCFG(PostViewUpdates); + DomTreeBuilder::ApplyUpdates(*this, PostViewCFG, &PostViewCFG); + } else { + // PreViewCFG needs to merge Updates and PostViewCFG. The updates in + // Updates need to be reversed, and match the direction in PostViewCFG. + // The PostViewCFG is created with updates reversed (equivalent to changes + // made to the CFG), so the PreViewCFG needs all the updates reverse + // applied. + SmallVector<UpdateType> AllUpdates(Updates.begin(), Updates.end()); + for (auto &Update : PostViewUpdates) + AllUpdates.push_back(Update); + GraphDiff<NodePtr, IsPostDom> PreViewCFG(AllUpdates, + /*ReverseApplyUpdates=*/true); + GraphDiff<NodePtr, IsPostDom> PostViewCFG(PostViewUpdates); + DomTreeBuilder::ApplyUpdates(*this, PreViewCFG, &PostViewCFG); + } } /// Inform the dominator tree about a CFG edge insertion and update the tree. @@ -807,9 +839,7 @@ protected: "NewBB should have a single successor!"); NodeRef NewBBSucc = *GraphT::child_begin(NewBB); - std::vector<NodeRef> PredBlocks; - for (auto Pred : children<Inverse<N>>(NewBB)) - PredBlocks.push_back(Pred); + SmallVector<NodeRef, 4> PredBlocks(children<Inverse<N>>(NewBB)); assert(!PredBlocks.empty() && "No predblocks?"); diff --git a/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/llvm/include/llvm/Support/GenericDomTreeConstruction.h index 464de4e2b3ba..4b59ad1f017f 100644 --- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h +++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h @@ -58,6 +58,7 @@ struct SemiNCAInfo { using TreeNodePtr = DomTreeNodeBase<NodeT> *; using RootsT = decltype(DomTreeT::Roots); static constexpr bool IsPostDom = DomTreeT::IsPostDominator; + using GraphDiffT = GraphDiff<NodePtr, IsPostDom>; // Information record used by Semi-NCA during tree construction. struct InfoRec { @@ -77,21 +78,17 @@ struct SemiNCAInfo { using UpdateT = typename DomTreeT::UpdateType; using UpdateKind = typename DomTreeT::UpdateKind; struct BatchUpdateInfo { - SmallVector<UpdateT, 4> Updates; - using NodePtrAndKind = PointerIntPair<NodePtr, 1, UpdateKind>; - - // In order to be able to walk a CFG that is out of sync with the CFG - // DominatorTree last knew about, use the list of updates to reconstruct - // previous CFG versions of the current CFG. For each node, we store a set - // of its virtually added/deleted future successors and predecessors. - // Note that these children are from the future relative to what the - // DominatorTree knows about -- using them to gets us some snapshot of the - // CFG from the past (relative to the state of the CFG). - DenseMap<NodePtr, SmallVector<NodePtrAndKind, 4>> FutureSuccessors; - DenseMap<NodePtr, SmallVector<NodePtrAndKind, 4>> FuturePredecessors; + // Note: Updates inside PreViewCFG are aleady legalized. + BatchUpdateInfo(GraphDiffT &PreViewCFG, GraphDiffT *PostViewCFG = nullptr) + : PreViewCFG(PreViewCFG), PostViewCFG(PostViewCFG), + NumLegalized(PreViewCFG.getNumLegalizedUpdates()) {} + // Remembers if the whole tree was recalculated at some point during the // current batch update. bool IsRecalculated = false; + GraphDiffT &PreViewCFG; + GraphDiffT *PostViewCFG; + const size_t NumLegalized; }; BatchUpdateInfo *BatchUpdates; @@ -107,66 +104,24 @@ struct SemiNCAInfo { // in progress, we need this information to continue it. } - template <bool Inverse> - struct ChildrenGetter { - using ResultTy = SmallVector<NodePtr, 8>; - - static ResultTy Get(NodePtr N, std::integral_constant<bool, false>) { - auto RChildren = reverse(children<NodePtr>(N)); - return ResultTy(RChildren.begin(), RChildren.end()); - } - - static ResultTy Get(NodePtr N, std::integral_constant<bool, true>) { - auto IChildren = inverse_children<NodePtr>(N); - return ResultTy(IChildren.begin(), IChildren.end()); - } + template <bool Inversed> + static SmallVector<NodePtr, 8> getChildren(NodePtr N, BatchUpdatePtr BUI) { + if (BUI) + return BUI->PreViewCFG.template getChildren<Inversed>(N); + return getChildren<Inversed>(N); + } - using Tag = std::integral_constant<bool, Inverse>; - - // The function below is the core part of the batch updater. It allows the - // Depth Based Search algorithm to perform incremental updates in lockstep - // with updates to the CFG. We emulated lockstep CFG updates by getting its - // next snapshots by reverse-applying future updates. - static ResultTy Get(NodePtr N, BatchUpdatePtr BUI) { - ResultTy Res = Get(N, Tag()); - // If there's no batch update in progress, simply return node's children. - if (!BUI) return Res; - - // CFG children are actually its *most current* children, and we have to - // reverse-apply the future updates to get the node's children at the - // point in time the update was performed. - auto &FutureChildren = (Inverse != IsPostDom) ? BUI->FuturePredecessors - : BUI->FutureSuccessors; - auto FCIt = FutureChildren.find(N); - if (FCIt == FutureChildren.end()) return Res; - - for (auto ChildAndKind : FCIt->second) { - const NodePtr Child = ChildAndKind.getPointer(); - const UpdateKind UK = ChildAndKind.getInt(); - - // Reverse-apply the future update. - if (UK == UpdateKind::Insert) { - // If there's an insertion in the future, it means that the edge must - // exist in the current CFG, but was not present in it before. - assert(llvm::find(Res, Child) != Res.end() - && "Expected child not found in the CFG"); - Res.erase(std::remove(Res.begin(), Res.end(), Child), Res.end()); - LLVM_DEBUG(dbgs() << "\tHiding edge " << BlockNamePrinter(N) << " -> " - << BlockNamePrinter(Child) << "\n"); - } else { - // If there's an deletion in the future, it means that the edge cannot - // exist in the current CFG, but existed in it before. - assert(llvm::find(Res, Child) == Res.end() && - "Unexpected child found in the CFG"); - LLVM_DEBUG(dbgs() << "\tShowing virtual edge " << BlockNamePrinter(N) - << " -> " << BlockNamePrinter(Child) << "\n"); - Res.push_back(Child); - } - } + template <bool Inversed> + static SmallVector<NodePtr, 8> getChildren(NodePtr N) { + using DirectedNodeT = + std::conditional_t<Inversed, Inverse<NodePtr>, NodePtr>; + auto R = children<DirectedNodeT>(N); + SmallVector<NodePtr, 8> Res(detail::reverse_if<!Inversed>(R)); - return Res; - } - }; + // Remove nullptr children for clang. + llvm::erase_value(Res, nullptr); + return Res; + } NodePtr getIDom(NodePtr BB) const { auto InfoIt = NodeToInfo.find(BB); @@ -208,6 +163,8 @@ struct SemiNCAInfo { } }; + using NodeOrderMap = DenseMap<NodePtr, unsigned>; + // Custom DFS implementation which can skip nodes based on a provided // predicate. It also collects ReverseChildren so that we don't have to spend // time getting predecessors in SemiNCA. @@ -215,9 +172,13 @@ struct SemiNCAInfo { // If IsReverse is set to true, the DFS walk will be performed backwards // relative to IsPostDom -- using reverse edges for dominators and forward // edges for postdominators. + // + // If SuccOrder is specified then in this order the DFS traverses the children + // otherwise the order is implied by the results of getChildren(). template <bool IsReverse = false, typename DescendCondition> unsigned runDFS(NodePtr V, unsigned LastNum, DescendCondition Condition, - unsigned AttachToNum) { + unsigned AttachToNum, + const NodeOrderMap *SuccOrder = nullptr) { assert(V); SmallVector<NodePtr, 64> WorkList = {V}; if (NodeToInfo.count(V) != 0) NodeToInfo[V].Parent = AttachToNum; @@ -233,8 +194,14 @@ struct SemiNCAInfo { NumToNode.push_back(BB); constexpr bool Direction = IsReverse != IsPostDom; // XOR. - for (const NodePtr Succ : - ChildrenGetter<Direction>::Get(BB, BatchUpdates)) { + auto Successors = getChildren<Direction>(BB, BatchUpdates); + if (SuccOrder && Successors.size() > 1) + llvm::sort( + Successors.begin(), Successors.end(), [=](NodePtr A, NodePtr B) { + return SuccOrder->find(A)->second < SuccOrder->find(B)->second; + }); + + for (const NodePtr Succ : Successors) { const auto SIT = NodeToInfo.find(Succ); // Don't visit nodes more than once but remember to collect // ReverseChildren. @@ -369,7 +336,7 @@ struct SemiNCAInfo { // to CFG nodes within infinite loops. static bool HasForwardSuccessors(const NodePtr N, BatchUpdatePtr BUI) { assert(N && "N must be a valid node"); - return !ChildrenGetter<false>::Get(N, BUI).empty(); + return !getChildren<false>(N, BUI).empty(); } static NodePtr GetEntryNode(const DomTreeT &DT) { @@ -430,6 +397,32 @@ struct SemiNCAInfo { // nodes. if (Total + 1 != Num) { HasNonTrivialRoots = true; + + // SuccOrder is the order of blocks in the function. It is needed to make + // the calculation of the FurthestAway node and the whole PostDomTree + // immune to swap successors transformation (e.g. canonicalizing branch + // predicates). SuccOrder is initialized lazily only for successors of + // reverse unreachable nodes. + Optional<NodeOrderMap> SuccOrder; + auto InitSuccOrderOnce = [&]() { + SuccOrder = NodeOrderMap(); + for (const auto Node : nodes(DT.Parent)) + if (SNCA.NodeToInfo.count(Node) == 0) + for (const auto Succ : getChildren<false>(Node, SNCA.BatchUpdates)) + SuccOrder->try_emplace(Succ, 0); + + // Add mapping for all entries of SuccOrder. + unsigned NodeNum = 0; + for (const auto Node : nodes(DT.Parent)) { + ++NodeNum; + auto Order = SuccOrder->find(Node); + if (Order != SuccOrder->end()) { + assert(Order->second == 0); + Order->second = NodeNum; + } + } + }; + // Make another DFS pass over all other nodes to find the // reverse-unreachable blocks, and find the furthest paths we'll be able // to make. @@ -454,7 +447,12 @@ struct SemiNCAInfo { // expensive and does not always lead to a minimal set of roots. LLVM_DEBUG(dbgs() << "\t\t\tRunning forward DFS\n"); - const unsigned NewNum = SNCA.runDFS<true>(I, Num, AlwaysDescend, Num); + if (!SuccOrder) + InitSuccOrderOnce(); + assert(SuccOrder); + + const unsigned NewNum = + SNCA.runDFS<true>(I, Num, AlwaysDescend, Num, &*SuccOrder); const NodePtr FurthestAway = SNCA.NumToNode[NewNum]; LLVM_DEBUG(dbgs() << "\t\t\tFound a new furthest away node " << "(non-trivial root): " @@ -530,7 +528,7 @@ struct SemiNCAInfo { // If we wound another root in a (forward) DFS walk, remove the current // root from the set of roots, as it is reverse-reachable from the other // one. - if (llvm::find(Roots, N) != Roots.end()) { + if (llvm::is_contained(Roots, N)) { LLVM_DEBUG(dbgs() << "\tForward DFS walk found another root " << BlockNamePrinter(N) << "\n\tRemoving root " << BlockNamePrinter(Root) << "\n"); @@ -563,12 +561,21 @@ struct SemiNCAInfo { auto *Parent = DT.Parent; DT.reset(); DT.Parent = Parent; - SemiNCAInfo SNCA(nullptr); // Since we are rebuilding the whole tree, - // there's no point doing it incrementally. + // If the update is using the actual CFG, BUI is null. If it's using a view, + // BUI is non-null and the PreCFGView is used. When calculating from + // scratch, make the PreViewCFG equal to the PostCFGView, so Post is used. + BatchUpdatePtr PostViewBUI = nullptr; + if (BUI && BUI->PostViewCFG) { + BUI->PreViewCFG = *BUI->PostViewCFG; + PostViewBUI = BUI; + } + // This is rebuilding the whole tree, not incrementally, but PostViewBUI is + // used in case the caller needs a DT update with a CFGView. + SemiNCAInfo SNCA(PostViewBUI); // Step #0: Number blocks in depth-first order and initialize variables used // in later stages of the algorithm. - DT.Roots = FindRoots(DT, nullptr); + DT.Roots = FindRoots(DT, PostViewBUI); SNCA.doFullDFSWalk(DT, AlwaysDescend); SNCA.runSemiNCA(DT); @@ -679,8 +686,7 @@ struct SemiNCAInfo { // root. if (!DT.isVirtualRoot(To->getIDom())) return false; - auto RIt = llvm::find(DT.Roots, To->getBlock()); - if (RIt == DT.Roots.end()) + if (!llvm::is_contained(DT.Roots, To->getBlock())) return false; // To is not a root, nothing to update. LLVM_DEBUG(dbgs() << "\t\tAfter the insertion, " << BlockNamePrinter(To) @@ -787,8 +793,7 @@ struct SemiNCAInfo { // // Invariant: there is an optimal path from `To` to TN with the minimum // depth being CurrentLevel. - for (const NodePtr Succ : - ChildrenGetter<IsPostDom>::Get(TN->getBlock(), BUI)) { + for (const NodePtr Succ : getChildren<IsPostDom>(TN->getBlock(), BUI)) { const TreeNodePtr SuccTN = DT.getNode(Succ); assert(SuccTN && "Unreachable successor found at reachable insertion"); @@ -918,8 +923,8 @@ struct SemiNCAInfo { // the DomTree about it. // The check is O(N), so run it only in debug configuration. auto IsSuccessor = [BUI](const NodePtr SuccCandidate, const NodePtr Of) { - auto Successors = ChildrenGetter<IsPostDom>::Get(Of, BUI); - return llvm::find(Successors, SuccCandidate) != Successors.end(); + auto Successors = getChildren<IsPostDom>(Of, BUI); + return llvm::is_contained(Successors, SuccCandidate); }; (void)IsSuccessor; assert(!IsSuccessor(To, From) && "Deleted edge still exists in the CFG!"); @@ -1005,15 +1010,14 @@ struct SemiNCAInfo { const TreeNodePtr TN) { LLVM_DEBUG(dbgs() << "IsReachableFromIDom " << BlockNamePrinter(TN) << "\n"); - for (const NodePtr Pred : - ChildrenGetter<!IsPostDom>::Get(TN->getBlock(), BUI)) { + auto TNB = TN->getBlock(); + for (const NodePtr Pred : getChildren<!IsPostDom>(TNB, BUI)) { LLVM_DEBUG(dbgs() << "\tPred " << BlockNamePrinter(Pred) << "\n"); if (!DT.getNode(Pred)) continue; - const NodePtr Support = - DT.findNearestCommonDominator(TN->getBlock(), Pred); + const NodePtr Support = DT.findNearestCommonDominator(TNB, Pred); LLVM_DEBUG(dbgs() << "\tSupport " << BlockNamePrinter(Support) << "\n"); - if (Support != TN->getBlock()) { + if (Support != TNB) { LLVM_DEBUG(dbgs() << "\t" << BlockNamePrinter(TN) << " is reachable from support " << BlockNamePrinter(Support) << "\n"); @@ -1054,7 +1058,7 @@ struct SemiNCAInfo { const TreeNodePtr TN = DT.getNode(To); assert(TN); if (TN->getLevel() > Level) return true; - if (llvm::find(AffectedQueue, To) == AffectedQueue.end()) + if (!llvm::is_contained(AffectedQueue, To)) AffectedQueue.push_back(To); return false; @@ -1144,53 +1148,34 @@ struct SemiNCAInfo { //===--------------------- DomTree Batch Updater --------------------------=== //~~ - static void ApplyUpdates(DomTreeT &DT, ArrayRef<UpdateT> Updates) { - const size_t NumUpdates = Updates.size(); + static void ApplyUpdates(DomTreeT &DT, GraphDiffT &PreViewCFG, + GraphDiffT *PostViewCFG) { + // Note: the PostViewCFG is only used when computing from scratch. It's data + // should already included in the PreViewCFG for incremental updates. + const size_t NumUpdates = PreViewCFG.getNumLegalizedUpdates(); if (NumUpdates == 0) return; // Take the fast path for a single update and avoid running the batch update // machinery. if (NumUpdates == 1) { - const auto &Update = Updates.front(); - if (Update.getKind() == UpdateKind::Insert) - DT.insertEdge(Update.getFrom(), Update.getTo()); - else - DT.deleteEdge(Update.getFrom(), Update.getTo()); - + UpdateT Update = PreViewCFG.popUpdateForIncrementalUpdates(); + if (!PostViewCFG) { + if (Update.getKind() == UpdateKind::Insert) + InsertEdge(DT, /*BUI=*/nullptr, Update.getFrom(), Update.getTo()); + else + DeleteEdge(DT, /*BUI=*/nullptr, Update.getFrom(), Update.getTo()); + } else { + BatchUpdateInfo BUI(*PostViewCFG, PostViewCFG); + if (Update.getKind() == UpdateKind::Insert) + InsertEdge(DT, &BUI, Update.getFrom(), Update.getTo()); + else + DeleteEdge(DT, &BUI, Update.getFrom(), Update.getTo()); + } return; } - BatchUpdateInfo BUI; - LLVM_DEBUG(dbgs() << "Legalizing " << BUI.Updates.size() << " updates\n"); - cfg::LegalizeUpdates<NodePtr>(Updates, BUI.Updates, IsPostDom); - - const size_t NumLegalized = BUI.Updates.size(); - BUI.FutureSuccessors.reserve(NumLegalized); - BUI.FuturePredecessors.reserve(NumLegalized); - - // Use the legalized future updates to initialize future successors and - // predecessors. Note that these sets will only decrease size over time, as - // the next CFG snapshots slowly approach the actual (current) CFG. - for (UpdateT &U : BUI.Updates) { - BUI.FutureSuccessors[U.getFrom()].push_back({U.getTo(), U.getKind()}); - BUI.FuturePredecessors[U.getTo()].push_back({U.getFrom(), U.getKind()}); - } - -#if 0 - // FIXME: The LLVM_DEBUG macro only plays well with a modular - // build of LLVM when the header is marked as textual, but doing - // so causes redefinition errors. - LLVM_DEBUG(dbgs() << "About to apply " << NumLegalized << " updates\n"); - LLVM_DEBUG(if (NumLegalized < 32) for (const auto &U - : reverse(BUI.Updates)) { - dbgs() << "\t"; - U.dump(); - dbgs() << "\n"; - }); - LLVM_DEBUG(dbgs() << "\n"); -#endif - + BatchUpdateInfo BUI(PreViewCFG, PostViewCFG); // Recalculate the DominatorTree when the number of updates // exceeds a threshold, which usually makes direct updating slower than // recalculation. We select this threshold proportional to the @@ -1200,21 +1185,21 @@ struct SemiNCAInfo { // Make unittests of the incremental algorithm work if (DT.DomTreeNodes.size() <= 100) { - if (NumLegalized > DT.DomTreeNodes.size()) + if (BUI.NumLegalized > DT.DomTreeNodes.size()) CalculateFromScratch(DT, &BUI); - } else if (NumLegalized > DT.DomTreeNodes.size() / 40) + } else if (BUI.NumLegalized > DT.DomTreeNodes.size() / 40) CalculateFromScratch(DT, &BUI); // If the DominatorTree was recalculated at some point, stop the batch // updates. Full recalculations ignore batch updates and look at the actual // CFG. - for (size_t i = 0; i < NumLegalized && !BUI.IsRecalculated; ++i) + for (size_t i = 0; i < BUI.NumLegalized && !BUI.IsRecalculated; ++i) ApplyNextUpdate(DT, BUI); } static void ApplyNextUpdate(DomTreeT &DT, BatchUpdateInfo &BUI) { - assert(!BUI.Updates.empty() && "No updates to apply!"); - UpdateT CurrentUpdate = BUI.Updates.pop_back_val(); + // Popping the next update, will move the PreViewCFG to the next snapshot. + UpdateT CurrentUpdate = BUI.PreViewCFG.popUpdateForIncrementalUpdates(); #if 0 // FIXME: The LLVM_DEBUG macro only plays well with a modular // build of LLVM when the header is marked as textual, but doing @@ -1223,21 +1208,6 @@ struct SemiNCAInfo { LLVM_DEBUG(CurrentUpdate.dump(); dbgs() << "\n"); #endif - // Move to the next snapshot of the CFG by removing the reverse-applied - // current update. Since updates are performed in the same order they are - // legalized it's sufficient to pop the last item here. - auto &FS = BUI.FutureSuccessors[CurrentUpdate.getFrom()]; - assert(FS.back().getPointer() == CurrentUpdate.getTo() && - FS.back().getInt() == CurrentUpdate.getKind()); - FS.pop_back(); - if (FS.empty()) BUI.FutureSuccessors.erase(CurrentUpdate.getFrom()); - - auto &FP = BUI.FuturePredecessors[CurrentUpdate.getTo()]; - assert(FP.back().getPointer() == CurrentUpdate.getFrom() && - FP.back().getInt() == CurrentUpdate.getKind()); - FP.pop_back(); - if (FP.empty()) BUI.FuturePredecessors.erase(CurrentUpdate.getTo()); - if (CurrentUpdate.getKind() == UpdateKind::Insert) InsertEdge(DT, &BUI, CurrentUpdate.getFrom(), CurrentUpdate.getTo()); else @@ -1596,19 +1566,11 @@ void Calculate(DomTreeT &DT) { template <typename DomTreeT> void CalculateWithUpdates(DomTreeT &DT, ArrayRef<typename DomTreeT::UpdateType> Updates) { - // TODO: Move BUI creation in common method, reuse in ApplyUpdates. - typename SemiNCAInfo<DomTreeT>::BatchUpdateInfo BUI; - LLVM_DEBUG(dbgs() << "Legalizing " << BUI.Updates.size() << " updates\n"); - cfg::LegalizeUpdates<typename DomTreeT::NodePtr>(Updates, BUI.Updates, - DomTreeT::IsPostDominator); - const size_t NumLegalized = BUI.Updates.size(); - BUI.FutureSuccessors.reserve(NumLegalized); - BUI.FuturePredecessors.reserve(NumLegalized); - for (auto &U : BUI.Updates) { - BUI.FutureSuccessors[U.getFrom()].push_back({U.getTo(), U.getKind()}); - BUI.FuturePredecessors[U.getTo()].push_back({U.getFrom(), U.getKind()}); - } - + // FIXME: Updated to use the PreViewCFG and behave the same as until now. + // This behavior is however incorrect; this actually needs the PostViewCFG. + GraphDiff<typename DomTreeT::NodePtr, DomTreeT::IsPostDominator> PreViewCFG( + Updates, /*ReverseApplyUpdates=*/true); + typename SemiNCAInfo<DomTreeT>::BatchUpdateInfo BUI(PreViewCFG); SemiNCAInfo<DomTreeT>::CalculateFromScratch(DT, &BUI); } @@ -1628,8 +1590,11 @@ void DeleteEdge(DomTreeT &DT, typename DomTreeT::NodePtr From, template <class DomTreeT> void ApplyUpdates(DomTreeT &DT, - ArrayRef<typename DomTreeT::UpdateType> Updates) { - SemiNCAInfo<DomTreeT>::ApplyUpdates(DT, Updates); + GraphDiff<typename DomTreeT::NodePtr, + DomTreeT::IsPostDominator> &PreViewCFG, + GraphDiff<typename DomTreeT::NodePtr, + DomTreeT::IsPostDominator> *PostViewCFG) { + SemiNCAInfo<DomTreeT>::ApplyUpdates(DT, PreViewCFG, PostViewCFG); } template <class DomTreeT> diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index 3e5989d02500..b79de6f41c49 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -31,6 +31,16 @@ public: static Expected<GlobPattern> create(StringRef Pat); bool match(StringRef S) const; + // Returns true for glob pattern "*". Can be used to avoid expensive + // preparation/acquisition of the input for match(). + bool isTrivialMatchAll() const { + if (Prefix && Prefix->empty()) { + assert(!Suffix); + return true; + } + return false; + } + private: bool matchOne(ArrayRef<BitVector> Pat, StringRef S) const; diff --git a/llvm/include/llvm/Support/GraphWriter.h b/llvm/include/llvm/Support/GraphWriter.h index f9241b1e8081..1f60fbc35126 100644 --- a/llvm/include/llvm/Support/GraphWriter.h +++ b/llvm/include/llvm/Support/GraphWriter.h @@ -158,9 +158,7 @@ public: writeNode(Node); } - bool isNodeHidden(NodeRef Node) { - return DTraits.isNodeHidden(Node); - } + bool isNodeHidden(NodeRef Node) { return DTraits.isNodeHidden(Node, G); } void writeNode(NodeRef Node) { std::string NodeAttributes = DTraits.getNodeAttributes(Node, G); @@ -228,10 +226,10 @@ public: child_iterator EI = GTraits::child_begin(Node); child_iterator EE = GTraits::child_end(Node); for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) - if (!DTraits.isNodeHidden(*EI)) + if (!DTraits.isNodeHidden(*EI, G)) writeEdge(Node, i, EI); for (; EI != EE; ++EI) - if (!DTraits.isNodeHidden(*EI)) + if (!DTraits.isNodeHidden(*EI, G)) writeEdge(Node, 64, EI); } diff --git a/llvm/include/llvm/Support/InitLLVM.h b/llvm/include/llvm/Support/InitLLVM.h index 3be8d6b6d2e0..879dc1514d10 100644 --- a/llvm/include/llvm/Support/InitLLVM.h +++ b/llvm/include/llvm/Support/InitLLVM.h @@ -9,6 +9,7 @@ #ifndef LLVM_SUPPORT_LLVM_H #define LLVM_SUPPORT_LLVM_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/PrettyStackTrace.h" @@ -44,7 +45,7 @@ public: private: BumpPtrAllocator Alloc; SmallVector<const char *, 0> Args; - PrettyStackTraceProgram StackPrinter; + Optional<PrettyStackTraceProgram> StackPrinter; }; } // namespace llvm diff --git a/llvm/include/llvm/Support/InstructionCost.h b/llvm/include/llvm/Support/InstructionCost.h new file mode 100644 index 000000000000..fbc898b878bb --- /dev/null +++ b/llvm/include/llvm/Support/InstructionCost.h @@ -0,0 +1,238 @@ +//===- InstructionCost.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines an InstructionCost class that is used when calculating +/// the cost of an instruction, or a group of instructions. In addition to a +/// numeric value representing the cost the class also contains a state that +/// can be used to encode particular properties, i.e. a cost being invalid or +/// unknown. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_INSTRUCTIONCOST_H +#define LLVM_SUPPORT_INSTRUCTIONCOST_H + +#include "llvm/ADT/Optional.h" + +namespace llvm { + +class raw_ostream; + +class InstructionCost { +public: + using CostType = int; + + /// These states can currently be used to indicate whether a cost is valid or + /// invalid. Examples of an invalid cost might be where the cost is + /// prohibitively expensive and the user wants to prevent certain + /// optimizations being performed. Or perhaps the cost is simply unknown + /// because the operation makes no sense in certain circumstances. These + /// states can be expanded in future to support other cases if necessary. + enum CostState { Valid, Invalid }; + +private: + CostType Value; + CostState State; + + void propagateState(const InstructionCost &RHS) { + if (RHS.State == Invalid) + State = Invalid; + } + +public: + InstructionCost() = default; + + InstructionCost(CostState) = delete; + InstructionCost(CostType Val) : Value(Val), State(Valid) {} + + static InstructionCost getInvalid(CostType Val = 0) { + InstructionCost Tmp(Val); + Tmp.setInvalid(); + return Tmp; + } + + bool isValid() const { return State == Valid; } + void setValid() { State = Valid; } + void setInvalid() { State = Invalid; } + CostState getState() const { return State; } + + /// This function is intended to be used as sparingly as possible, since the + /// class provides the full range of operator support required for arithmetic + /// and comparisons. + Optional<CostType> getValue() const { + if (isValid()) + return Value; + return None; + } + + /// For all of the arithmetic operators provided here any invalid state is + /// perpetuated and cannot be removed. Once a cost becomes invalid it stays + /// invalid, and it also inherits any invalid state from the RHS. Regardless + /// of the state, arithmetic and comparisons work on the actual values in the + /// same way as they would on a basic type, such as integer. + + InstructionCost &operator+=(const InstructionCost &RHS) { + propagateState(RHS); + Value += RHS.Value; + return *this; + } + + InstructionCost &operator+=(const CostType RHS) { + InstructionCost RHS2(RHS); + *this += RHS2; + return *this; + } + + InstructionCost &operator-=(const InstructionCost &RHS) { + propagateState(RHS); + Value -= RHS.Value; + return *this; + } + + InstructionCost &operator-=(const CostType RHS) { + InstructionCost RHS2(RHS); + *this -= RHS2; + return *this; + } + + InstructionCost &operator*=(const InstructionCost &RHS) { + propagateState(RHS); + Value *= RHS.Value; + return *this; + } + + InstructionCost &operator*=(const CostType RHS) { + InstructionCost RHS2(RHS); + *this *= RHS2; + return *this; + } + + InstructionCost &operator/=(const InstructionCost &RHS) { + propagateState(RHS); + Value /= RHS.Value; + return *this; + } + + InstructionCost &operator/=(const CostType RHS) { + InstructionCost RHS2(RHS); + *this /= RHS2; + return *this; + } + + InstructionCost &operator++() { + *this += 1; + return *this; + } + + InstructionCost operator++(int) { + InstructionCost Copy = *this; + ++*this; + return Copy; + } + + InstructionCost &operator--() { + *this -= 1; + return *this; + } + + InstructionCost operator--(int) { + InstructionCost Copy = *this; + --*this; + return Copy; + } + + bool operator==(const InstructionCost &RHS) const { + return State == RHS.State && Value == RHS.Value; + } + + bool operator!=(const InstructionCost &RHS) const { return !(*this == RHS); } + + bool operator==(const CostType RHS) const { + return State == Valid && Value == RHS; + } + + bool operator!=(const CostType RHS) const { return !(*this == RHS); } + + /// For the comparison operators we have chosen to use total ordering with + /// the following rules: + /// 1. If either of the states != Valid then a lexicographical order is + /// applied based upon the state. + /// 2. If both states are valid then order based upon value. + /// This avoids having to add asserts the comparison operators that the states + /// are valid and users can test for validity of the cost explicitly. + bool operator<(const InstructionCost &RHS) const { + if (State != Valid || RHS.State != Valid) + return State < RHS.State; + return Value < RHS.Value; + } + + bool operator>(const InstructionCost &RHS) const { return RHS < *this; } + + bool operator<=(const InstructionCost &RHS) const { return !(RHS < *this); } + + bool operator>=(const InstructionCost &RHS) const { return !(*this < RHS); } + + bool operator<(const CostType RHS) const { + InstructionCost RHS2(RHS); + return *this < RHS2; + } + + bool operator>(const CostType RHS) const { + InstructionCost RHS2(RHS); + return *this > RHS2; + } + + bool operator<=(const CostType RHS) const { + InstructionCost RHS2(RHS); + return *this <= RHS2; + } + + bool operator>=(const CostType RHS) const { + InstructionCost RHS2(RHS); + return *this >= RHS2; + } + + void print(raw_ostream &OS) const; +}; + +inline InstructionCost operator+(const InstructionCost &LHS, + const InstructionCost &RHS) { + InstructionCost LHS2(LHS); + LHS2 += RHS; + return LHS2; +} + +inline InstructionCost operator-(const InstructionCost &LHS, + const InstructionCost &RHS) { + InstructionCost LHS2(LHS); + LHS2 -= RHS; + return LHS2; +} + +inline InstructionCost operator*(const InstructionCost &LHS, + const InstructionCost &RHS) { + InstructionCost LHS2(LHS); + LHS2 *= RHS; + return LHS2; +} + +inline InstructionCost operator/(const InstructionCost &LHS, + const InstructionCost &RHS) { + InstructionCost LHS2(LHS); + LHS2 /= RHS; + return LHS2; +} + +inline raw_ostream &operator<<(raw_ostream &OS, const InstructionCost &V) { + V.print(OS); + return OS; +} + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h index 8b1c66234fe8..c753cee60ec1 100644 --- a/llvm/include/llvm/Support/JSON.h +++ b/llvm/include/llvm/Support/JSON.h @@ -253,7 +253,14 @@ inline bool operator!=(const Array &L, const Array &R) { return !(L == R); } /// === Converting JSON values to C++ types === /// /// The convention is to have a deserializer function findable via ADL: -/// fromJSON(const json::Value&, T&)->bool +/// fromJSON(const json::Value&, T&, Path) -> bool +/// +/// The return value indicates overall success, and Path is used for precise +/// error reporting. (The Path::Root passed in at the top level fromJSON call +/// captures any nested error and can render it in context). +/// If conversion fails, fromJSON calls Path::report() and immediately returns. +/// This ensures that the first fatal error survives. +/// /// Deserializers are provided for: /// - bool /// - int and int64_t @@ -449,12 +456,12 @@ private: friend class Object; template <typename T, typename... U> void create(U &&... V) { - new (reinterpret_cast<T *>(Union.buffer)) T(std::forward<U>(V)...); + new (reinterpret_cast<T *>(&Union)) T(std::forward<U>(V)...); } template <typename T> T &as() const { // Using this two-step static_cast via void * instead of reinterpret_cast // silences a -Wstrict-aliasing false positive from GCC6 and earlier. - void *Storage = static_cast<void *>(Union.buffer); + void *Storage = static_cast<void *>(&Union); return *static_cast<T *>(Storage); } @@ -557,81 +564,169 @@ inline bool Object::erase(StringRef K) { return M.erase(ObjectKey(K)); } +/// A "cursor" marking a position within a Value. +/// The Value is a tree, and this is the path from the root to the current node. +/// This is used to associate errors with particular subobjects. +class Path { +public: + class Root; + + /// Records that the value at the current path is invalid. + /// Message is e.g. "expected number" and becomes part of the final error. + /// This overwrites any previously written error message in the root. + void report(llvm::StringLiteral Message); + + /// The root may be treated as a Path. + Path(Root &R) : Parent(nullptr), Seg(&R) {} + /// Derives a path for an array element: this[Index] + Path index(unsigned Index) const { return Path(this, Segment(Index)); } + /// Derives a path for an object field: this.Field + Path field(StringRef Field) const { return Path(this, Segment(Field)); } + +private: + /// One element in a JSON path: an object field (.foo) or array index [27]. + /// Exception: the root Path encodes a pointer to the Path::Root. + class Segment { + uintptr_t Pointer; + unsigned Offset; + + public: + Segment() = default; + Segment(Root *R) : Pointer(reinterpret_cast<uintptr_t>(R)) {} + Segment(llvm::StringRef Field) + : Pointer(reinterpret_cast<uintptr_t>(Field.data())), + Offset(static_cast<unsigned>(Field.size())) {} + Segment(unsigned Index) : Pointer(0), Offset(Index) {} + + bool isField() const { return Pointer != 0; } + StringRef field() const { + return StringRef(reinterpret_cast<const char *>(Pointer), Offset); + } + unsigned index() const { return Offset; } + Root *root() const { return reinterpret_cast<Root *>(Pointer); } + }; + + const Path *Parent; + Segment Seg; + + Path(const Path *Parent, Segment S) : Parent(Parent), Seg(S) {} +}; + +/// The root is the trivial Path to the root value. +/// It also stores the latest reported error and the path where it occurred. +class Path::Root { + llvm::StringRef Name; + llvm::StringLiteral ErrorMessage; + std::vector<Path::Segment> ErrorPath; // Only valid in error state. Reversed. + + friend void Path::report(llvm::StringLiteral Message); + +public: + Root(llvm::StringRef Name = "") : Name(Name), ErrorMessage("") {} + // No copy/move allowed as there are incoming pointers. + Root(Root &&) = delete; + Root &operator=(Root &&) = delete; + Root(const Root &) = delete; + Root &operator=(const Root &) = delete; + + /// Returns the last error reported, or else a generic error. + Error getError() const; + /// Print the root value with the error shown inline as a comment. + /// Unrelated parts of the value are elided for brevity, e.g. + /// { + /// "id": 42, + /// "name": /* expected string */ null, + /// "properties": { ... } + /// } + void printErrorContext(const Value &, llvm::raw_ostream &) const; +}; + // Standard deserializers are provided for primitive types. // See comments on Value. -inline bool fromJSON(const Value &E, std::string &Out) { +inline bool fromJSON(const Value &E, std::string &Out, Path P) { if (auto S = E.getAsString()) { Out = std::string(*S); return true; } + P.report("expected string"); return false; } -inline bool fromJSON(const Value &E, int &Out) { +inline bool fromJSON(const Value &E, int &Out, Path P) { if (auto S = E.getAsInteger()) { Out = *S; return true; } + P.report("expected integer"); return false; } -inline bool fromJSON(const Value &E, int64_t &Out) { +inline bool fromJSON(const Value &E, int64_t &Out, Path P) { if (auto S = E.getAsInteger()) { Out = *S; return true; } + P.report("expected integer"); return false; } -inline bool fromJSON(const Value &E, double &Out) { +inline bool fromJSON(const Value &E, double &Out, Path P) { if (auto S = E.getAsNumber()) { Out = *S; return true; } + P.report("expected number"); return false; } -inline bool fromJSON(const Value &E, bool &Out) { +inline bool fromJSON(const Value &E, bool &Out, Path P) { if (auto S = E.getAsBoolean()) { Out = *S; return true; } + P.report("expected boolean"); return false; } -inline bool fromJSON(const Value &E, std::nullptr_t &Out) { +inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) { if (auto S = E.getAsNull()) { Out = *S; return true; } + P.report("expected null"); return false; } -template <typename T> bool fromJSON(const Value &E, llvm::Optional<T> &Out) { +template <typename T> +bool fromJSON(const Value &E, llvm::Optional<T> &Out, Path P) { if (E.getAsNull()) { Out = llvm::None; return true; } T Result; - if (!fromJSON(E, Result)) + if (!fromJSON(E, Result, P)) return false; Out = std::move(Result); return true; } -template <typename T> bool fromJSON(const Value &E, std::vector<T> &Out) { +template <typename T> +bool fromJSON(const Value &E, std::vector<T> &Out, Path P) { if (auto *A = E.getAsArray()) { Out.clear(); Out.resize(A->size()); for (size_t I = 0; I < A->size(); ++I) - if (!fromJSON((*A)[I], Out[I])) + if (!fromJSON((*A)[I], Out[I], P.index(I))) return false; return true; } + P.report("expected array"); return false; } template <typename T> -bool fromJSON(const Value &E, std::map<std::string, T> &Out) { +bool fromJSON(const Value &E, std::map<std::string, T> &Out, Path P) { if (auto *O = E.getAsObject()) { Out.clear(); for (const auto &KV : *O) - if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))])) + if (!fromJSON(KV.second, Out[std::string(llvm::StringRef(KV.first))], + P.field(KV.first))) return false; return true; } + P.report("expected object"); return false; } @@ -644,42 +739,59 @@ template <typename T> Value toJSON(const llvm::Optional<T> &Opt) { /// /// Example: /// \code -/// bool fromJSON(const Value &E, MyStruct &R) { -/// ObjectMapper O(E); -/// if (!O || !O.map("mandatory_field", R.MandatoryField)) -/// return false; -/// O.map("optional_field", R.OptionalField); -/// return true; +/// bool fromJSON(const Value &E, MyStruct &R, Path P) { +/// ObjectMapper O(E, P); +/// // When returning false, error details were already reported. +/// return O && O.map("mandatory_field", R.MandatoryField) && +/// O.mapOptional("optional_field", R.OptionalField); /// } /// \endcode class ObjectMapper { public: - ObjectMapper(const Value &E) : O(E.getAsObject()) {} + /// If O is not an object, this mapper is invalid and an error is reported. + ObjectMapper(const Value &E, Path P) : O(E.getAsObject()), P(P) { + if (!O) + P.report("expected object"); + } /// True if the expression is an object. /// Must be checked before calling map(). - operator bool() { return O; } + operator bool() const { return O; } - /// Maps a property to a field, if it exists. - template <typename T> bool map(StringRef Prop, T &Out) { + /// Maps a property to a field. + /// If the property is missing or invalid, reports an error. + template <typename T> bool map(StringLiteral Prop, T &Out) { assert(*this && "Must check this is an object before calling map()"); if (const Value *E = O->get(Prop)) - return fromJSON(*E, Out); + return fromJSON(*E, Out, P.field(Prop)); + P.field(Prop).report("missing value"); return false; } /// Maps a property to a field, if it exists. + /// If the property exists and is invalid, reports an error. /// (Optional requires special handling, because missing keys are OK). - template <typename T> bool map(StringRef Prop, llvm::Optional<T> &Out) { + template <typename T> bool map(StringLiteral Prop, llvm::Optional<T> &Out) { assert(*this && "Must check this is an object before calling map()"); if (const Value *E = O->get(Prop)) - return fromJSON(*E, Out); + return fromJSON(*E, Out, P.field(Prop)); Out = llvm::None; return true; } + /// Maps a property to a field, if it exists. + /// If the property exists and is invalid, reports an error. + /// If the property does not exist, Out is unchanged. + template <typename T> bool mapOptional(StringLiteral Prop, T &Out) { + assert(*this && "Must check this is an object before calling map()"); + if (const Value *E = O->get(Prop)) + return fromJSON(*E, Out, P.field(Prop)); + return true; + } + private: const Object *O; + Path P; }; /// Parses the provided JSON source, or returns a ParseError. @@ -703,9 +815,24 @@ public: } }; +/// Version of parse() that converts the parsed value to the type T. +/// RootName describes the root object and is used in error messages. +template <typename T> +Expected<T> parse(const llvm::StringRef &JSON, const char *RootName = "") { + auto V = parse(JSON); + if (!V) + return V.takeError(); + Path::Root R(RootName); + T Result; + if (fromJSON(*V, Result, R)) + return std::move(Result); + return R.getError(); +} + /// json::OStream allows writing well-formed JSON without materializing /// all structures as json::Value ahead of time. /// It's faster, lower-level, and less safe than OS << json::Value. +/// It also allows emitting more constructs, such as comments. /// /// Only one "top-level" object can be written to a stream. /// Simplest usage involves passing lambdas (Blocks) to fill in containers: @@ -791,6 +918,21 @@ class OStream { Contents(); objectEnd(); } + /// Emit an externally-serialized value. + /// The caller must write exactly one valid JSON value to the provided stream. + /// No validation or formatting of this value occurs. + void rawValue(llvm::function_ref<void(raw_ostream &)> Contents) { + rawValueBegin(); + Contents(OS); + rawValueEnd(); + } + void rawValue(llvm::StringRef Contents) { + rawValue([&](raw_ostream &OS) { OS << Contents; }); + } + /// Emit a JavaScript comment associated with the next printed value. + /// The string must be valid until the next attribute or value is emitted. + /// Comments are not part of standard JSON, and many parsers reject them! + void comment(llvm::StringRef); // High level functions to output object attributes. // Valid only within an object (any number of times). @@ -817,8 +959,10 @@ class OStream { void objectEnd(); void attributeBegin(llvm::StringRef Key); void attributeEnd(); + raw_ostream &rawValueBegin(); + void rawValueEnd(); - private: +private: void attributeImpl(llvm::StringRef Key, Block Contents) { attributeBegin(Key); Contents(); @@ -826,18 +970,21 @@ class OStream { } void valueBegin(); + void flushComment(); void newline(); enum Context { Singleton, // Top level, or object attribute. Array, Object, + RawValue, // External code writing a value to OS directly. }; struct State { Context Ctx = Singleton; bool HasValue = false; }; llvm::SmallVector<State, 16> Stack; // Never empty. + llvm::StringRef PendingComment; llvm::raw_ostream &OS; unsigned IndentSize; unsigned Indent = 0; diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index 69040cd23f03..d854aadbd430 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -15,6 +15,7 @@ #define LLVM_SUPPORT_KNOWNBITS_H #include "llvm/ADT/APInt.h" +#include "llvm/ADT/Optional.h" namespace llvm { @@ -97,6 +98,9 @@ public: /// Returns true if this value is known to be non-negative. bool isNonNegative() const { return Zero.isSignBitSet(); } + /// Returns true if this value is known to be non-zero. + bool isNonZero() const { return !One.isNullValue(); } + /// Returns true if this value is known to be positive. bool isStrictlyPositive() const { return Zero.isSignBitSet() && !One.isNullValue(); } @@ -110,18 +114,38 @@ public: Zero.setSignBit(); } - /// Return the minimal value possible given these KnownBits. + /// Return the minimal unsigned value possible given these KnownBits. APInt getMinValue() const { // Assume that all bits that aren't known-ones are zeros. return One; } - /// Return the maximal value possible given these KnownBits. + /// Return the minimal signed value possible given these KnownBits. + APInt getSignedMinValue() const { + // Assume that all bits that aren't known-ones are zeros. + APInt Min = One; + // Sign bit is unknown. + if (Zero.isSignBitClear()) + Min.setSignBit(); + return Min; + } + + /// Return the maximal unsigned value possible given these KnownBits. APInt getMaxValue() const { // Assume that all bits that aren't known-zeros are ones. return ~Zero; } + /// Return the maximal signed value possible given these KnownBits. + APInt getSignedMaxValue() const { + // Assume that all bits that aren't known-zeros are ones. + APInt Max = ~Zero; + // Sign bit is unknown. + if (One.isSignBitClear()) + Max.clearSignBit(); + return Max; + } + /// Return known bits for a truncation of the value we're tracking. KnownBits trunc(unsigned BitWidth) const { return KnownBits(Zero.trunc(BitWidth), One.trunc(BitWidth)); @@ -166,6 +190,20 @@ public: return *this; } + /// Return known bits for a sign extension or truncation of the value we're + /// tracking. + KnownBits sextOrTrunc(unsigned BitWidth) const { + if (BitWidth > getBitWidth()) + return sext(BitWidth); + if (BitWidth < getBitWidth()) + return trunc(BitWidth); + return *this; + } + + /// Return known bits for a in-register sign extension of the value we're + /// tracking. + KnownBits sextInReg(unsigned SrcBitWidth) const; + /// Return a KnownBits with the extracted bits /// [bitPosition,bitPosition+numBits). KnownBits extractBits(unsigned NumBits, unsigned BitPosition) const { @@ -173,6 +211,10 @@ public: One.extractBits(NumBits, BitPosition)); } + /// Return KnownBits based on this, but updated given that the underlying + /// value is known to be greater than or equal to Val. + KnownBits makeGE(const APInt &Val) const; + /// Returns the minimum number of trailing zero bits. unsigned countMinTrailingZeros() const { return Zero.countTrailingOnes(); @@ -233,6 +275,16 @@ public: return getBitWidth() - Zero.countPopulation(); } + /// Create known bits from a known constant. + static KnownBits makeConstant(const APInt &C) { + return KnownBits(~C, C); + } + + /// Compute known bits common to LHS and RHS. + static KnownBits commonBits(const KnownBits &LHS, const KnownBits &RHS) { + return KnownBits(LHS.Zero & RHS.Zero, LHS.One & RHS.One); + } + /// Compute known bits resulting from adding LHS, RHS and a 1-bit Carry. static KnownBits computeForAddCarry( const KnownBits &LHS, const KnownBits &RHS, const KnownBits &Carry); @@ -241,6 +293,84 @@ public: static KnownBits computeForAddSub(bool Add, bool NSW, const KnownBits &LHS, KnownBits RHS); + /// Compute known bits resulting from multiplying LHS and RHS. + static KnownBits computeForMul(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for udiv(LHS, RHS). + static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for urem(LHS, RHS). + static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for srem(LHS, RHS). + static KnownBits srem(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for umax(LHS, RHS). + static KnownBits umax(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for umin(LHS, RHS). + static KnownBits umin(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for smax(LHS, RHS). + static KnownBits smax(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for smin(LHS, RHS). + static KnownBits smin(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for shl(LHS, RHS). + /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS. + static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for lshr(LHS, RHS). + /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS. + static KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS); + + /// Compute known bits for ashr(LHS, RHS). + /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS. + static KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_EQ result. + static Optional<bool> eq(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_NE result. + static Optional<bool> ne(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_UGT result. + static Optional<bool> ugt(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_UGE result. + static Optional<bool> uge(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_ULT result. + static Optional<bool> ult(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_ULE result. + static Optional<bool> ule(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_SGT result. + static Optional<bool> sgt(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_SGE result. + static Optional<bool> sge(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_SLT result. + static Optional<bool> slt(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_SLE result. + static Optional<bool> sle(const KnownBits &LHS, const KnownBits &RHS); + + /// Insert the bits from a smaller known bits starting at bitPosition. + void insertBits(const KnownBits &SubBits, unsigned BitPosition) { + Zero.insertBits(SubBits.Zero, BitPosition); + One.insertBits(SubBits.One, BitPosition); + } + + /// Return a subset of the known bits from [bitPosition,bitPosition+numBits). + KnownBits extractBits(unsigned NumBits, unsigned BitPosition) { + return KnownBits(Zero.extractBits(NumBits, BitPosition), + One.extractBits(NumBits, BitPosition)); + } + /// Update known bits based on ANDing with RHS. KnownBits &operator&=(const KnownBits &RHS); @@ -249,6 +379,17 @@ public: /// Update known bits based on XORing with RHS. KnownBits &operator^=(const KnownBits &RHS); + + /// Compute known bits for the absolute value. + KnownBits abs(bool IntMinIsPoison = false) const; + + KnownBits byteSwap() { + return KnownBits(Zero.byteSwap(), One.byteSwap()); + } + + KnownBits reverseBits() { + return KnownBits(Zero.reverseBits(), One.reverseBits()); + } }; inline KnownBits operator&(KnownBits LHS, const KnownBits &RHS) { diff --git a/llvm/include/llvm/Support/LineIterator.h b/llvm/include/llvm/Support/LineIterator.h index 2a1e47bfe5b7..b391412685c4 100644 --- a/llvm/include/llvm/Support/LineIterator.h +++ b/llvm/include/llvm/Support/LineIterator.h @@ -9,8 +9,10 @@ #ifndef LLVM_SUPPORT_LINEITERATOR_H #define LLVM_SUPPORT_LINEITERATOR_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/MemoryBufferRef.h" #include <iterator> namespace llvm { @@ -30,7 +32,7 @@ class MemoryBuffer; /// Note that this iterator requires the buffer to be nul terminated. class line_iterator : public std::iterator<std::forward_iterator_tag, StringRef> { - const MemoryBuffer *Buffer = nullptr; + Optional<MemoryBufferRef> Buffer; char CommentMarker = '\0'; bool SkipBlanks = true; @@ -41,6 +43,10 @@ public: /// Default construct an "end" iterator. line_iterator() = default; + /// Construct a new iterator around an unowned memory buffer. + explicit line_iterator(const MemoryBufferRef &Buffer, bool SkipBlanks = true, + char CommentMarker = '\0'); + /// Construct a new iterator around some memory buffer. explicit line_iterator(const MemoryBuffer &Buffer, bool SkipBlanks = true, char CommentMarker = '\0'); diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h index 3bb8220e72e5..1c600d0108b6 100644 --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -111,113 +111,119 @@ namespace llvm { v8i64 = 61, // 8 x i64 v16i64 = 62, // 16 x i64 v32i64 = 63, // 32 x i64 + v64i64 = 64, // 64 x i64 + v128i64 = 65, // 128 x i64 + v256i64 = 66, // 256 x i64 - v1i128 = 64, // 1 x i128 + v1i128 = 67, // 1 x i128 FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1, LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128, - v2f16 = 65, // 2 x f16 - v3f16 = 66, // 3 x f16 - v4f16 = 67, // 4 x f16 - v8f16 = 68, // 8 x f16 - v16f16 = 69, // 16 x f16 - v32f16 = 70, // 32 x f16 - v64f16 = 71, // 64 x f16 - v128f16 = 72, // 128 x f16 - v2bf16 = 73, // 2 x bf16 - v3bf16 = 74, // 3 x bf16 - v4bf16 = 75, // 4 x bf16 - v8bf16 = 76, // 8 x bf16 - v16bf16 = 77, // 16 x bf16 - v32bf16 = 78, // 32 x bf16 - v64bf16 = 79, // 64 x bf16 - v128bf16 = 80, // 128 x bf16 - v1f32 = 81, // 1 x f32 - v2f32 = 82, // 2 x f32 - v3f32 = 83, // 3 x f32 - v4f32 = 84, // 4 x f32 - v5f32 = 85, // 5 x f32 - v8f32 = 86, // 8 x f32 - v16f32 = 87, // 16 x f32 - v32f32 = 88, // 32 x f32 - v64f32 = 89, // 64 x f32 - v128f32 = 90, // 128 x f32 - v256f32 = 91, // 256 x f32 - v512f32 = 92, // 512 x f32 - v1024f32 = 93, // 1024 x f32 - v2048f32 = 94, // 2048 x f32 - v1f64 = 95, // 1 x f64 - v2f64 = 96, // 2 x f64 - v4f64 = 97, // 4 x f64 - v8f64 = 98, // 8 x f64 - v16f64 = 99, // 16 x f64 - v32f64 = 100, // 32 x f64 + v2f16 = 68, // 2 x f16 + v3f16 = 69, // 3 x f16 + v4f16 = 70, // 4 x f16 + v8f16 = 71, // 8 x f16 + v16f16 = 72, // 16 x f16 + v32f16 = 73, // 32 x f16 + v64f16 = 74, // 64 x f16 + v128f16 = 75, // 128 x f16 + v2bf16 = 76, // 2 x bf16 + v3bf16 = 77, // 3 x bf16 + v4bf16 = 78, // 4 x bf16 + v8bf16 = 79, // 8 x bf16 + v16bf16 = 80, // 16 x bf16 + v32bf16 = 81, // 32 x bf16 + v64bf16 = 82, // 64 x bf16 + v128bf16 = 83, // 128 x bf16 + v1f32 = 84, // 1 x f32 + v2f32 = 85, // 2 x f32 + v3f32 = 86, // 3 x f32 + v4f32 = 87, // 4 x f32 + v5f32 = 88, // 5 x f32 + v8f32 = 89, // 8 x f32 + v16f32 = 90, // 16 x f32 + v32f32 = 91, // 32 x f32 + v64f32 = 92, // 64 x f32 + v128f32 = 93, // 128 x f32 + v256f32 = 94, // 256 x f32 + v512f32 = 95, // 512 x f32 + v1024f32 = 96, // 1024 x f32 + v2048f32 = 97, // 2048 x f32 + v1f64 = 98, // 1 x f64 + v2f64 = 99, // 2 x f64 + v4f64 = 100, // 4 x f64 + v8f64 = 101, // 8 x f64 + v16f64 = 102, // 16 x f64 + v32f64 = 103, // 32 x f64 + v64f64 = 104, // 64 x f64 + v128f64 = 105, // 128 x f64 + v256f64 = 106, // 256 x f64 FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v2f16, - LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v32f64, + LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v256f64, FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1, - LAST_FIXEDLEN_VECTOR_VALUETYPE = v32f64, - - nxv1i1 = 101, // n x 1 x i1 - nxv2i1 = 102, // n x 2 x i1 - nxv4i1 = 103, // n x 4 x i1 - nxv8i1 = 104, // n x 8 x i1 - nxv16i1 = 105, // n x 16 x i1 - nxv32i1 = 106, // n x 32 x i1 - nxv64i1 = 107, // n x 64 x i1 - - nxv1i8 = 108, // n x 1 x i8 - nxv2i8 = 109, // n x 2 x i8 - nxv4i8 = 110, // n x 4 x i8 - nxv8i8 = 111, // n x 8 x i8 - nxv16i8 = 112, // n x 16 x i8 - nxv32i8 = 113, // n x 32 x i8 - nxv64i8 = 114, // n x 64 x i8 - - nxv1i16 = 115, // n x 1 x i16 - nxv2i16 = 116, // n x 2 x i16 - nxv4i16 = 117, // n x 4 x i16 - nxv8i16 = 118, // n x 8 x i16 - nxv16i16 = 119, // n x 16 x i16 - nxv32i16 = 120, // n x 32 x i16 - - nxv1i32 = 121, // n x 1 x i32 - nxv2i32 = 122, // n x 2 x i32 - nxv4i32 = 123, // n x 4 x i32 - nxv8i32 = 124, // n x 8 x i32 - nxv16i32 = 125, // n x 16 x i32 - nxv32i32 = 126, // n x 32 x i32 - - nxv1i64 = 127, // n x 1 x i64 - nxv2i64 = 128, // n x 2 x i64 - nxv4i64 = 129, // n x 4 x i64 - nxv8i64 = 130, // n x 8 x i64 - nxv16i64 = 131, // n x 16 x i64 - nxv32i64 = 132, // n x 32 x i64 + LAST_FIXEDLEN_VECTOR_VALUETYPE = v256f64, + + nxv1i1 = 107, // n x 1 x i1 + nxv2i1 = 108, // n x 2 x i1 + nxv4i1 = 109, // n x 4 x i1 + nxv8i1 = 110, // n x 8 x i1 + nxv16i1 = 111, // n x 16 x i1 + nxv32i1 = 112, // n x 32 x i1 + nxv64i1 = 113, // n x 64 x i1 + + nxv1i8 = 114, // n x 1 x i8 + nxv2i8 = 115, // n x 2 x i8 + nxv4i8 = 116, // n x 4 x i8 + nxv8i8 = 117, // n x 8 x i8 + nxv16i8 = 118, // n x 16 x i8 + nxv32i8 = 119, // n x 32 x i8 + nxv64i8 = 120, // n x 64 x i8 + + nxv1i16 = 121, // n x 1 x i16 + nxv2i16 = 122, // n x 2 x i16 + nxv4i16 = 123, // n x 4 x i16 + nxv8i16 = 124, // n x 8 x i16 + nxv16i16 = 125, // n x 16 x i16 + nxv32i16 = 126, // n x 32 x i16 + + nxv1i32 = 127, // n x 1 x i32 + nxv2i32 = 128, // n x 2 x i32 + nxv4i32 = 129, // n x 4 x i32 + nxv8i32 = 130, // n x 8 x i32 + nxv16i32 = 131, // n x 16 x i32 + nxv32i32 = 132, // n x 32 x i32 + + nxv1i64 = 133, // n x 1 x i64 + nxv2i64 = 134, // n x 2 x i64 + nxv4i64 = 135, // n x 4 x i64 + nxv8i64 = 136, // n x 8 x i64 + nxv16i64 = 137, // n x 16 x i64 + nxv32i64 = 138, // n x 32 x i64 FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1, LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64, - nxv1f16 = 133, // n x 1 x f16 - nxv2f16 = 134, // n x 2 x f16 - nxv4f16 = 135, // n x 4 x f16 - nxv8f16 = 136, // n x 8 x f16 - nxv16f16 = 137, // n x 16 x f16 - nxv32f16 = 138, // n x 32 x f16 - nxv2bf16 = 139, // n x 2 x bf16 - nxv4bf16 = 140, // n x 4 x bf16 - nxv8bf16 = 141, // n x 8 x bf16 - nxv1f32 = 142, // n x 1 x f32 - nxv2f32 = 143, // n x 2 x f32 - nxv4f32 = 144, // n x 4 x f32 - nxv8f32 = 145, // n x 8 x f32 - nxv16f32 = 146, // n x 16 x f32 - nxv1f64 = 147, // n x 1 x f64 - nxv2f64 = 148, // n x 2 x f64 - nxv4f64 = 149, // n x 4 x f64 - nxv8f64 = 150, // n x 8 x f64 + nxv1f16 = 139, // n x 1 x f16 + nxv2f16 = 140, // n x 2 x f16 + nxv4f16 = 141, // n x 4 x f16 + nxv8f16 = 142, // n x 8 x f16 + nxv16f16 = 143, // n x 16 x f16 + nxv32f16 = 144, // n x 32 x f16 + nxv2bf16 = 145, // n x 2 x bf16 + nxv4bf16 = 146, // n x 4 x bf16 + nxv8bf16 = 147, // n x 8 x bf16 + nxv1f32 = 148, // n x 1 x f32 + nxv2f32 = 149, // n x 2 x f32 + nxv4f32 = 150, // n x 4 x f32 + nxv8f32 = 151, // n x 8 x f32 + nxv16f32 = 152, // n x 16 x f32 + nxv1f64 = 153, // n x 1 x f64 + nxv2f64 = 154, // n x 2 x f64 + nxv4f64 = 155, // n x 4 x f64 + nxv8f64 = 156, // n x 8 x f64 FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv1f16, LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64, @@ -228,25 +234,27 @@ namespace llvm { FIRST_VECTOR_VALUETYPE = v1i1, LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 151, // This is an X86 MMX value + x86mmx = 157, // This is an X86 MMX value - Glue = 152, // This glues nodes together during pre-RA sched + Glue = 158, // This glues nodes together during pre-RA sched - isVoid = 153, // This has no value + isVoid = 159, // This has no value - Untyped = 154, // This value takes a register, but has + Untyped = 160, // This value takes a register, but has // unspecified type. The register class // will be determined by the opcode. - exnref = 155, // WebAssembly's exnref type + funcref = 161, // WebAssembly's funcref type + externref = 162, // WebAssembly's externref type + x86amx = 163, // This is an X86 AMX value FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = 156, // This always remains at the end of the list. + LAST_VALUETYPE = 164, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors // This value must be a multiple of 32. - MAX_ALLOWED_VALUETYPE = 160, + MAX_ALLOWED_VALUETYPE = 192, // A value of type llvm::TokenTy token = 248, @@ -419,13 +427,43 @@ namespace llvm { SimpleTy == MVT::iPTRAny); } + /// Return a vector with the same number of elements as this vector, but + /// with the element type converted to an integer type with the same + /// bitwidth. + MVT changeVectorElementTypeToInteger() const { + MVT EltTy = getVectorElementType(); + MVT IntTy = MVT::getIntegerVT(EltTy.getSizeInBits()); + MVT VecTy = MVT::getVectorVT(IntTy, getVectorElementCount()); + assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE && + "Simple vector VT not representable by simple integer vector VT!"); + return VecTy; + } + + /// Return a VT for a vector type whose attributes match ourselves + /// with the exception of the element type that is chosen by the caller. + MVT changeVectorElementType(MVT EltVT) const { + MVT VecTy = MVT::getVectorVT(EltVT, getVectorElementCount()); + assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE && + "Simple vector VT not representable by simple integer vector VT!"); + return VecTy; + } + + /// Return the type converted to an equivalently sized integer or vector + /// with integer element type. Similar to changeVectorElementTypeToInteger, + /// but also handles scalars. + MVT changeTypeToInteger() { + if (isVector()) + return changeVectorElementTypeToInteger(); + return MVT::getIntegerVT(getSizeInBits()); + } + /// Return a VT for a vector type with the same element type but /// half the number of elements. MVT getHalfNumVectorElementsVT() const { MVT EltVT = getVectorElementType(); auto EltCnt = getVectorElementCount(); - assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!"); - return getVectorVT(EltVT, EltCnt / 2); + assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!"); + return getVectorVT(EltVT, EltCnt.divideCoefficientBy(2)); } /// Returns true if the given vector is a power of 2. @@ -529,6 +567,9 @@ namespace llvm { case v8i64: case v16i64: case v32i64: + case v64i64: + case v128i64: + case v256i64: case nxv1i64: case nxv2i64: case nxv4i64: @@ -586,6 +627,9 @@ namespace llvm { case v8f64: case v16f64: case v32f64: + case v64f64: + case v128f64: + case v256f64: case nxv1f64: case nxv2f64: case nxv4f64: @@ -608,21 +652,27 @@ namespace llvm { case v256i1: case v256i8: case v256i32: - case v256f32: return 256; + case v256i64: + case v256f32: + case v256f64: return 256; case v128i1: case v128i8: case v128i16: case v128i32: + case v128i64: case v128f16: case v128bf16: - case v128f32: return 128; + case v128f32: + case v128f64: return 128; case v64i1: case v64i8: case v64i16: case v64i32: + case v64i64: case v64f16: case v64bf16: case v64f32: + case v64f64: case nxv64i1: case nxv64i8: return 64; case v32i1: @@ -737,12 +787,12 @@ namespace llvm { } ElementCount getVectorElementCount() const { - return { getVectorNumElements(), isScalableVector() }; + return ElementCount::get(getVectorNumElements(), isScalableVector()); } /// Given a vector type, return the minimum number of elements it contains. unsigned getVectorMinNumElements() const { - return getVectorElementCount().Min; + return getVectorElementCount().getKnownMinValue(); } /// Returns the size of the specified MVT in bits. @@ -910,21 +960,35 @@ namespace llvm { case v32f64: return TypeSize::Fixed(2048); case nxv32i64: return TypeSize::Scalable(2048); case v128i32: - case v128f32: return TypeSize::Fixed(4096); + case v64i64: + case v128f32: + case v64f64: return TypeSize::Fixed(4096); case v256i32: - case v256f32: return TypeSize::Fixed(8192); + case v128i64: + case v256f32: + case x86amx: + case v128f64: return TypeSize::Fixed(8192); case v512i32: - case v512f32: return TypeSize::Fixed(16384); + case v256i64: + case v512f32: + case v256f64: return TypeSize::Fixed(16384); case v1024i32: case v1024f32: return TypeSize::Fixed(32768); case v2048i32: case v2048f32: return TypeSize::Fixed(65536); - case exnref: return TypeSize::Fixed(0); // opaque type + case funcref: + case externref: return TypeSize::Fixed(0); // opaque type } } - TypeSize getScalarSizeInBits() const { - return getScalarType().getSizeInBits(); + /// Return the size of the specified fixed width value type in bits. The + /// function will assert if the type is scalable. + uint64_t getFixedSizeInBits() const { + return getSizeInBits().getFixedSize(); + } + + uint64_t getScalarSizeInBits() const { + return getScalarType().getSizeInBits().getFixedSize(); } /// Return the number of bytes overwritten by a store of the specified value @@ -950,28 +1014,56 @@ namespace llvm { /// Returns true if the number of bits for the type is a multiple of an /// 8-bit byte. - bool isByteSized() const { - return getSizeInBits().isByteSized(); + bool isByteSized() const { return getSizeInBits().isKnownMultipleOf(8); } + + /// Return true if we know at compile time this has more bits than VT. + bool knownBitsGT(MVT VT) const { + return TypeSize::isKnownGT(getSizeInBits(), VT.getSizeInBits()); + } + + /// Return true if we know at compile time this has more than or the same + /// bits as VT. + bool knownBitsGE(MVT VT) const { + return TypeSize::isKnownGE(getSizeInBits(), VT.getSizeInBits()); + } + + /// Return true if we know at compile time this has fewer bits than VT. + bool knownBitsLT(MVT VT) const { + return TypeSize::isKnownLT(getSizeInBits(), VT.getSizeInBits()); + } + + /// Return true if we know at compile time this has fewer than or the same + /// bits as VT. + bool knownBitsLE(MVT VT) const { + return TypeSize::isKnownLE(getSizeInBits(), VT.getSizeInBits()); } /// Return true if this has more bits than VT. bool bitsGT(MVT VT) const { - return getSizeInBits() > VT.getSizeInBits(); + assert(isScalableVector() == VT.isScalableVector() && + "Comparison between scalable and fixed types"); + return knownBitsGT(VT); } /// Return true if this has no less bits than VT. bool bitsGE(MVT VT) const { - return getSizeInBits() >= VT.getSizeInBits(); + assert(isScalableVector() == VT.isScalableVector() && + "Comparison between scalable and fixed types"); + return knownBitsGE(VT); } /// Return true if this has less bits than VT. bool bitsLT(MVT VT) const { - return getSizeInBits() < VT.getSizeInBits(); + assert(isScalableVector() == VT.isScalableVector() && + "Comparison between scalable and fixed types"); + return knownBitsLT(VT); } /// Return true if this has no more bits than VT. bool bitsLE(MVT VT) const { - return getSizeInBits() <= VT.getSizeInBits(); + assert(isScalableVector() == VT.isScalableVector() && + "Comparison between scalable and fixed types"); + return knownBitsLE(VT); } static MVT getFloatingPointVT(unsigned BitWidth) { @@ -1072,6 +1164,9 @@ namespace llvm { if (NumElements == 8) return MVT::v8i64; if (NumElements == 16) return MVT::v16i64; if (NumElements == 32) return MVT::v32i64; + if (NumElements == 64) return MVT::v64i64; + if (NumElements == 128) return MVT::v128i64; + if (NumElements == 256) return MVT::v256i64; break; case MVT::i128: if (NumElements == 1) return MVT::v1i128; @@ -1119,6 +1214,9 @@ namespace llvm { if (NumElements == 8) return MVT::v8f64; if (NumElements == 16) return MVT::v16f64; if (NumElements == 32) return MVT::v32f64; + if (NumElements == 64) return MVT::v64f64; + if (NumElements == 128) return MVT::v128f64; + if (NumElements == 256) return MVT::v256f64; break; } return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); @@ -1207,9 +1305,9 @@ namespace llvm { } static MVT getVectorVT(MVT VT, ElementCount EC) { - if (EC.Scalable) - return getScalableVectorVT(VT, EC.Min); - return getVectorVT(VT, EC.Min); + if (EC.isScalable()) + return getScalableVectorVT(VT, EC.getKnownMinValue()); + return getVectorVT(VT, EC.getKnownMinValue()); } /// Return the value type corresponding to the specified type. This returns diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index 16da3046c8ce..33b9065261e8 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -440,7 +440,7 @@ inline uint64_t maxUIntN(uint64_t N) { inline int64_t minIntN(int64_t N) { assert(N > 0 && N <= 64 && "integer width out of range"); - return -(UINT64_C(1)<<(N-1)); + return UINT64_C(1) + ~(UINT64_C(1) << (N - 1)); } /// Gets the maximum value for a N-bit signed integer. diff --git a/llvm/include/llvm/Support/MemoryBuffer.h b/llvm/include/llvm/Support/MemoryBuffer.h index f47a8d2d334b..9e6ee2536c5e 100644 --- a/llvm/include/llvm/Support/MemoryBuffer.h +++ b/llvm/include/llvm/Support/MemoryBuffer.h @@ -19,14 +19,12 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBufferRef.h" #include <cstddef> #include <cstdint> #include <memory> namespace llvm { - -class MemoryBufferRef; - namespace sys { namespace fs { // Duplicated from FileSystem.h to avoid a dependency. @@ -260,26 +258,6 @@ private: using MemoryBuffer::getSTDIN; }; -class MemoryBufferRef { - StringRef Buffer; - StringRef Identifier; - -public: - MemoryBufferRef() = default; - MemoryBufferRef(const MemoryBuffer& Buffer) - : Buffer(Buffer.getBuffer()), Identifier(Buffer.getBufferIdentifier()) {} - MemoryBufferRef(StringRef Buffer, StringRef Identifier) - : Buffer(Buffer), Identifier(Identifier) {} - - StringRef getBuffer() const { return Buffer; } - - StringRef getBufferIdentifier() const { return Identifier; } - - const char *getBufferStart() const { return Buffer.begin(); } - const char *getBufferEnd() const { return Buffer.end(); } - size_t getBufferSize() const { return Buffer.size(); } -}; - // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_SIMPLE_CONVERSION_FUNCTIONS(MemoryBuffer, LLVMMemoryBufferRef) diff --git a/llvm/include/llvm/Support/MemoryBufferRef.h b/llvm/include/llvm/Support/MemoryBufferRef.h new file mode 100644 index 000000000000..b38a1f3b6565 --- /dev/null +++ b/llvm/include/llvm/Support/MemoryBufferRef.h @@ -0,0 +1,56 @@ +//===- MemoryBufferRef.h - Memory Buffer Reference --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the MemoryBuffer interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_MEMORYBUFFERREF_H +#define LLVM_SUPPORT_MEMORYBUFFERREF_H + +#include "llvm/ADT/StringRef.h" + +namespace llvm { + +class MemoryBuffer; + +class MemoryBufferRef { + StringRef Buffer; + StringRef Identifier; + +public: + MemoryBufferRef() = default; + MemoryBufferRef(const MemoryBuffer &Buffer); + MemoryBufferRef(StringRef Buffer, StringRef Identifier) + : Buffer(Buffer), Identifier(Identifier) {} + + StringRef getBuffer() const { return Buffer; } + StringRef getBufferIdentifier() const { return Identifier; } + + const char *getBufferStart() const { return Buffer.begin(); } + const char *getBufferEnd() const { return Buffer.end(); } + size_t getBufferSize() const { return Buffer.size(); } + + /// Check pointer identity (not value) of identifier and data. + friend bool operator==(const MemoryBufferRef &LHS, + const MemoryBufferRef &RHS) { + return LHS.Buffer.begin() == RHS.Buffer.begin() && + LHS.Buffer.end() == RHS.Buffer.end() && + LHS.Identifier.begin() == RHS.Identifier.begin() && + LHS.Identifier.end() == RHS.Identifier.end(); + } + + friend bool operator!=(const MemoryBufferRef &LHS, + const MemoryBufferRef &RHS) { + return !(LHS == RHS); + } +}; + +} // namespace llvm + +#endif // LLVM_SUPPORT_MEMORYBUFFERREF_H diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h index 2c0edfbb1db5..d2f006773836 100644 --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -11,6 +11,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Error.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Threading.h" @@ -120,13 +121,17 @@ void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, llvm::Log2_64(std::distance(Start, End)) + 1); } +// TaskGroup has a relatively high overhead, so we want to reduce +// the number of spawn() calls. We'll create up to 1024 tasks here. +// (Note that 1024 is an arbitrary number. This code probably needs +// improving to take the number of available cores into account.) +enum { MaxTasksPerGroup = 1024 }; + template <class IterTy, class FuncTy> void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { - // TaskGroup has a relatively high overhead, so we want to reduce - // the number of spawn() calls. We'll create up to 1024 tasks here. - // (Note that 1024 is an arbitrary number. This code probably needs - // improving to take the number of available cores into account.) - ptrdiff_t TaskSize = std::distance(Begin, End) / 1024; + // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling + // overhead on large inputs. + ptrdiff_t TaskSize = std::distance(Begin, End) / MaxTasksPerGroup; if (TaskSize == 0) TaskSize = 1; @@ -140,7 +145,9 @@ void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { template <class IndexTy, class FuncTy> void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { - ptrdiff_t TaskSize = (End - Begin) / 1024; + // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling + // overhead on large inputs. + ptrdiff_t TaskSize = (End - Begin) / MaxTasksPerGroup; if (TaskSize == 0) TaskSize = 1; @@ -156,6 +163,50 @@ void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { Fn(J); } +template <class IterTy, class ResultTy, class ReduceFuncTy, + class TransformFuncTy> +ResultTy parallel_transform_reduce(IterTy Begin, IterTy End, ResultTy Init, + ReduceFuncTy Reduce, + TransformFuncTy Transform) { + // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling + // overhead on large inputs. + size_t NumInputs = std::distance(Begin, End); + if (NumInputs == 0) + return std::move(Init); + size_t NumTasks = std::min(static_cast<size_t>(MaxTasksPerGroup), NumInputs); + std::vector<ResultTy> Results(NumTasks, Init); + { + // Each task processes either TaskSize or TaskSize+1 inputs. Any inputs + // remaining after dividing them equally amongst tasks are distributed as + // one extra input over the first tasks. + TaskGroup TG; + size_t TaskSize = NumInputs / NumTasks; + size_t RemainingInputs = NumInputs % NumTasks; + IterTy TBegin = Begin; + for (size_t TaskId = 0; TaskId < NumTasks; ++TaskId) { + IterTy TEnd = TBegin + TaskSize + (TaskId < RemainingInputs ? 1 : 0); + TG.spawn([=, &Transform, &Reduce, &Results] { + // Reduce the result of transformation eagerly within each task. + ResultTy R = Init; + for (IterTy It = TBegin; It != TEnd; ++It) + R = Reduce(R, Transform(*It)); + Results[TaskId] = R; + }); + TBegin = TEnd; + } + assert(TBegin == End); + } + + // Do a final reduction. There are at most 1024 tasks, so this only adds + // constant single-threaded overhead for large inputs. Hopefully most + // reductions are cheaper than the transformation. + ResultTy FinalResult = std::move(Results.front()); + for (ResultTy &PartialResult : + makeMutableArrayRef(Results.data() + 1, Results.size() - 1)) + FinalResult = Reduce(FinalResult, std::move(PartialResult)); + return std::move(FinalResult); +} + #endif } // namespace detail @@ -198,6 +249,22 @@ void parallelForEachN(size_t Begin, size_t End, FuncTy Fn) { Fn(I); } +template <class IterTy, class ResultTy, class ReduceFuncTy, + class TransformFuncTy> +ResultTy parallelTransformReduce(IterTy Begin, IterTy End, ResultTy Init, + ReduceFuncTy Reduce, + TransformFuncTy Transform) { +#if LLVM_ENABLE_THREADS + if (parallel::strategy.ThreadsRequested != 1) { + return parallel::detail::parallel_transform_reduce(Begin, End, Init, Reduce, + Transform); + } +#endif + for (IterTy I = Begin; I != End; ++I) + Init = Reduce(std::move(Init), Transform(*I)); + return std::move(Init); +} + // Range wrappers. template <class RangeTy, class Comparator = std::less<decltype(*std::begin(RangeTy()))>> @@ -210,6 +277,31 @@ void parallelForEach(RangeTy &&R, FuncTy Fn) { parallelForEach(std::begin(R), std::end(R), Fn); } +template <class RangeTy, class ResultTy, class ReduceFuncTy, + class TransformFuncTy> +ResultTy parallelTransformReduce(RangeTy &&R, ResultTy Init, + ReduceFuncTy Reduce, + TransformFuncTy Transform) { + return parallelTransformReduce(std::begin(R), std::end(R), Init, Reduce, + Transform); +} + +// Parallel for-each, but with error handling. +template <class RangeTy, class FuncTy> +Error parallelForEachError(RangeTy &&R, FuncTy Fn) { + // The transform_reduce algorithm requires that the initial value be copyable. + // Error objects are uncopyable. We only need to copy initial success values, + // so work around this mismatch via the C API. The C API represents success + // values with a null pointer. The joinErrors discards null values and joins + // multiple errors into an ErrorList. + return unwrap(parallelTransformReduce( + std::begin(R), std::end(R), wrap(Error::success()), + [](LLVMErrorRef Lhs, LLVMErrorRef Rhs) { + return wrap(joinErrors(unwrap(Lhs), unwrap(Rhs))); + }, + [&Fn](auto &&V) { return wrap(Fn(V)); })); +} + } // namespace llvm #endif // LLVM_SUPPORT_PARALLEL_H diff --git a/llvm/include/llvm/Support/Path.h b/llvm/include/llvm/Support/Path.h index 83bca5b70bc2..af70e086a1b6 100644 --- a/llvm/include/llvm/Support/Path.h +++ b/llvm/include/llvm/Support/Path.h @@ -451,10 +451,48 @@ bool has_extension(const Twine &path, Style style = Style::native); /// Is path absolute? /// +/// According to cppreference.com, C++17 states: "An absolute path is a path +/// that unambiguously identifies the location of a file without reference to +/// an additional starting location." +/// +/// In other words, the rules are: +/// 1) POSIX style paths with nonempty root directory are absolute. +/// 2) Windows style paths with nonempty root name and root directory are +/// absolute. +/// 3) No other paths are absolute. +/// +/// \see has_root_name +/// \see has_root_directory +/// /// @param path Input path. /// @result True if the path is absolute, false if it is not. bool is_absolute(const Twine &path, Style style = Style::native); +/// Is path absolute using GNU rules? +/// +/// GNU rules are: +/// 1) Paths starting with a path separator are absolute. +/// 2) Windows style paths are also absolute if they start with a character +/// followed by ':'. +/// 3) No other paths are absolute. +/// +/// On Windows style the path "C:\Users\Default" has "C:" as root name and "\" +/// as root directory. +/// +/// Hence "C:" on Windows is absolute under GNU rules and not absolute under +/// C++17 because it has no root directory. Likewise "/" and "\" on Windows are +/// absolute under GNU and are not absolute under C++17 due to empty root name. +/// +/// \see has_root_name +/// \see has_root_directory +/// +/// @param path Input path. +/// @param style The style of \p path (e.g. Windows or POSIX). "native" style +/// means to derive the style from the host. +/// @result True if the path is absolute following GNU rules, false if it is +/// not. +bool is_absolute_gnu(const Twine &path, Style style = Style::native); + /// Is path relative? /// /// @param path Input path. diff --git a/llvm/include/llvm/Support/PluginLoader.h b/llvm/include/llvm/Support/PluginLoader.h index c0c516bdae03..95c087f03d9b 100644 --- a/llvm/include/llvm/Support/PluginLoader.h +++ b/llvm/include/llvm/Support/PluginLoader.h @@ -16,7 +16,11 @@ #ifndef LLVM_SUPPORT_PLUGINLOADER_H #define LLVM_SUPPORT_PLUGINLOADER_H +#ifndef DONT_GET_PLUGIN_LOADER_OPTION #include "llvm/Support/CommandLine.h" +#endif + +#include <string> namespace llvm { struct PluginLoader { diff --git a/llvm/include/llvm/Support/Process.h b/llvm/include/llvm/Support/Process.h index 0ba6d58ba287..729917bb41f4 100644 --- a/llvm/include/llvm/Support/Process.h +++ b/llvm/include/llvm/Support/Process.h @@ -29,6 +29,7 @@ #include "llvm/Support/Chrono.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Error.h" +#include "llvm/Support/Program.h" #include <system_error> namespace llvm { @@ -107,10 +108,12 @@ public: /// considered. static Optional<std::string> FindInEnvPath(StringRef EnvName, StringRef FileName, - ArrayRef<std::string> IgnoreList); + ArrayRef<std::string> IgnoreList, + char Separator = EnvPathSeparator); static Optional<std::string> FindInEnvPath(StringRef EnvName, - StringRef FileName); + StringRef FileName, + char Separator = EnvPathSeparator); // This functions ensures that the standard file descriptors (input, output, // and error) are properly mapped to a file descriptor before we use any of @@ -210,8 +213,9 @@ public: /// Equivalent to ::exit(), except when running inside a CrashRecoveryContext. /// In that case, the control flow will resume after RunSafely(), like for a /// crash, rather than exiting the current process. + /// Use \arg NoCleanup for calling _exit() instead of exit(). LLVM_ATTRIBUTE_NORETURN - static void Exit(int RetCode); + static void Exit(int RetCode, bool NoCleanup = false); }; } diff --git a/llvm/include/llvm/Support/Program.h b/llvm/include/llvm/Support/Program.h index dbda064cda05..bfd271958788 100644 --- a/llvm/include/llvm/Support/Program.h +++ b/llvm/include/llvm/Support/Program.h @@ -14,6 +14,7 @@ #define LLVM_SUPPORT_PROGRAM_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/llvm-config.h" @@ -36,7 +37,7 @@ namespace sys { typedef unsigned long procid_t; // Must match the type of DWORD on Windows. typedef void *process_t; // Must match the type of HANDLE on Windows. #else - typedef pid_t procid_t; + typedef ::pid_t procid_t; typedef procid_t process_t; #endif @@ -125,9 +126,11 @@ namespace sys { ///< string is non-empty upon return an error occurred while invoking the ///< program. bool *ExecutionFailed = nullptr, - Optional<ProcessStatistics> *ProcStat = nullptr ///< If non-zero, provides - /// a pointer to a structure in which process execution statistics will be - /// stored. + Optional<ProcessStatistics> *ProcStat = nullptr, ///< If non-zero, + /// provides a pointer to a structure in which process execution + /// statistics will be stored. + BitVector *AffinityMask = nullptr ///< CPUs or processors the new + /// program shall run on. ); /// Similar to ExecuteAndWait, but returns immediately. @@ -140,7 +143,8 @@ namespace sys { ArrayRef<Optional<StringRef>> Redirects = {}, unsigned MemoryLimit = 0, std::string *ErrMsg = nullptr, - bool *ExecutionFailed = nullptr); + bool *ExecutionFailed = nullptr, + BitVector *AffinityMask = nullptr); /// Return true if the given arguments fit within system-specific /// argument length limits. @@ -218,7 +222,7 @@ namespace sys { /// to build a single flat command line appropriate for calling CreateProcess /// on /// Windows. - std::string flattenWindowsCommandLine(ArrayRef<StringRef> Args); + ErrorOr<std::wstring> flattenWindowsCommandLine(ArrayRef<StringRef> Args); #endif } } diff --git a/llvm/include/llvm/Support/RISCVTargetParser.def b/llvm/include/llvm/Support/RISCVTargetParser.def new file mode 100644 index 000000000000..6a06f9258105 --- /dev/null +++ b/llvm/include/llvm/Support/RISCVTargetParser.def @@ -0,0 +1,27 @@ +#ifndef PROC_ALIAS +#define PROC_ALIAS(NAME, RV32, RV64) +#endif + +PROC_ALIAS("generic", "generic-rv32", "generic-rv64") +PROC_ALIAS("rocket", "rocket-rv32", "rocket-rv64") +PROC_ALIAS("sifive-7-series", "sifive-7-rv32", "sifive-7-rv64") + +#undef PROC_ALIAS + +#ifndef PROC +#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) +#endif + +PROC(INVALID, {"invalid"}, FK_INVALID, {""}) +PROC(GENERIC_RV32, {"generic-rv32"}, FK_NONE, {""}) +PROC(GENERIC_RV64, {"generic-rv64"}, FK_64BIT, {""}) +PROC(ROCKET_RV32, {"rocket-rv32"}, FK_NONE, {""}) +PROC(ROCKET_RV64, {"rocket-rv64"}, FK_64BIT, {""}) +PROC(SIFIVE_732, {"sifive-7-rv32"}, FK_NONE, {""}) +PROC(SIFIVE_764, {"sifive-7-rv64"}, FK_64BIT, {""}) +PROC(SIFIVE_E31, {"sifive-e31"}, FK_NONE, {"rv32imac"}) +PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"}) +PROC(SIFIVE_E76, {"sifive-e76"}, FK_NONE, {"rv32imafc"}) +PROC(SIFIVE_U74, {"sifive-u74"}, FK_64BIT, {"rv64gc"}) + +#undef PROC diff --git a/llvm/include/llvm/Support/Signals.h b/llvm/include/llvm/Support/Signals.h index e0a18e72f2a7..44f5a750ff5c 100644 --- a/llvm/include/llvm/Support/Signals.h +++ b/llvm/include/llvm/Support/Signals.h @@ -50,7 +50,9 @@ namespace sys { void DisableSystemDialogsOnCrash(); /// Print the stack trace using the given \c raw_ostream object. - void PrintStackTrace(raw_ostream &OS); + /// \param Depth refers to the number of stackframes to print. If not + /// specified, the entire frame is printed. + void PrintStackTrace(raw_ostream &OS, int Depth = 0); // Run all registered signal handlers. void RunSignalHandlers(); @@ -115,6 +117,8 @@ namespace sys { /// Context is a system-specific failure context: it is the signal type on /// Unix; the ExceptionContext on Windows. void CleanupOnSignal(uintptr_t Context); + + void unregisterHandlers(); } // End sys namespace } // End llvm namespace diff --git a/llvm/include/llvm/Support/Signposts.h b/llvm/include/llvm/Support/Signposts.h index b5a8c3d61e3e..8036b1f53663 100644 --- a/llvm/include/llvm/Support/Signposts.h +++ b/llvm/include/llvm/Support/Signposts.h @@ -17,9 +17,10 @@ #ifndef LLVM_SUPPORT_SIGNPOSTS_H #define LLVM_SUPPORT_SIGNPOSTS_H +#include "llvm/ADT/StringRef.h" + namespace llvm { class SignpostEmitterImpl; -class Timer; /// Manages the emission of signposts into the recording method supported by /// the OS. @@ -32,10 +33,10 @@ public: bool isEnabled() const; - /// Begin a signposted interval for the given timer. - void startTimerInterval(Timer *T); - /// End a signposted interval for the given timer. - void endTimerInterval(Timer *T); + /// Begin a signposted interval for a given object. + void startInterval(const void *O, StringRef Name); + /// End a signposted interval for a given object. + void endInterval(const void *O, StringRef Name); }; } // end namespace llvm diff --git a/llvm/include/llvm/Support/SourceMgr.h b/llvm/include/llvm/Support/SourceMgr.h index a0bd3ca2e0c1..28716b42f4ab 100644 --- a/llvm/include/llvm/Support/SourceMgr.h +++ b/llvm/include/llvm/Support/SourceMgr.h @@ -172,6 +172,11 @@ public: std::pair<unsigned, unsigned> getLineAndColumn(SMLoc Loc, unsigned BufferID = 0) const; + /// Get a string with the \p SMLoc filename and line number + /// formatted in the standard style. + std::string getFormattedLocationNoOffset(SMLoc Loc, + bool IncludePath = false) const; + /// Given a line and column number in a mapped buffer, turn it into an SMLoc. /// This will return a null SMLoc if the line/column location is invalid. SMLoc FindLocForLineAndColumn(unsigned BufferID, unsigned LineNo, diff --git a/llvm/include/llvm/Support/SuffixTree.h b/llvm/include/llvm/Support/SuffixTree.h index 67d513d032ce..352fba511937 100644 --- a/llvm/include/llvm/Support/SuffixTree.h +++ b/llvm/include/llvm/Support/SuffixTree.h @@ -322,10 +322,10 @@ public: return It; } - bool operator==(const RepeatedSubstringIterator &Other) { + bool operator==(const RepeatedSubstringIterator &Other) const { return N == Other.N; } - bool operator!=(const RepeatedSubstringIterator &Other) { + bool operator!=(const RepeatedSubstringIterator &Other) const { return !(*this == Other); } diff --git a/llvm/include/llvm/Support/SwapByteOrder.h b/llvm/include/llvm/Support/SwapByteOrder.h index 0e544fc7e71e..e8612ba6654b 100644 --- a/llvm/include/llvm/Support/SwapByteOrder.h +++ b/llvm/include/llvm/Support/SwapByteOrder.h @@ -22,7 +22,7 @@ #endif #if defined(__linux__) || defined(__GNU__) || defined(__HAIKU__) || \ - defined(__EMSCRIPTEN__) + defined(__Fuchsia__) || defined(__EMSCRIPTEN__) #include <endian.h> #elif defined(_AIX) #include <sys/machine.h> diff --git a/llvm/include/llvm/Support/SymbolRemappingReader.h b/llvm/include/llvm/Support/SymbolRemappingReader.h index 2b9ab570eb8b..820cf9e02192 100644 --- a/llvm/include/llvm/Support/SymbolRemappingReader.h +++ b/llvm/include/llvm/Support/SymbolRemappingReader.h @@ -68,7 +68,7 @@ namespace llvm { class SymbolRemappingParseError : public ErrorInfo<SymbolRemappingParseError> { public: - SymbolRemappingParseError(StringRef File, int64_t Line, Twine Message) + SymbolRemappingParseError(StringRef File, int64_t Line, const Twine &Message) : File(File), Line(Line), Message(Message.str()) {} void log(llvm::raw_ostream &OS) const override { diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index c069f5d22ba8..a63d40484089 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -77,6 +77,10 @@ HANDLE_TARGET_OPCODE(SUBREG_TO_REG) /// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic HANDLE_TARGET_OPCODE(DBG_VALUE) +/// DBG_INSTR_REF - A mapping of llvm.dbg.value referring to the instruction +/// that defines the value, rather than a virtual register. +HANDLE_TARGET_OPCODE(DBG_INSTR_REF) + /// DBG_LABEL - a mapping of the llvm.dbg.label intrinsic HANDLE_TARGET_OPCODE(DBG_LABEL) @@ -106,6 +110,9 @@ HANDLE_TARGET_OPCODE(BUNDLE) HANDLE_TARGET_OPCODE(LIFETIME_START) HANDLE_TARGET_OPCODE(LIFETIME_END) +/// Pseudo probe +HANDLE_TARGET_OPCODE(PSEUDO_PROBE) + /// A Stackmap instruction captures the location of live variables at its /// position in the instruction stream. It is followed by a shadow of bytes /// that must lie within the function and not contain another stackmap. @@ -294,6 +301,12 @@ HANDLE_TARGET_OPCODE(G_INTRINSIC_TRUNC) /// INTRINSIC round intrinsic. HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUND) +/// INTRINSIC round to integer intrinsic. +HANDLE_TARGET_OPCODE(G_INTRINSIC_LRINT) + +/// INTRINSIC roundeven intrinsic. +HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUNDEVEN) + /// INTRINSIC readcyclecounter HANDLE_TARGET_OPCODE(G_READCYCLECOUNTER) @@ -469,6 +482,36 @@ HANDLE_TARGET_OPCODE(G_USUBSAT) /// Generic saturating signed subtraction. HANDLE_TARGET_OPCODE(G_SSUBSAT) +/// Generic saturating unsigned left shift. +HANDLE_TARGET_OPCODE(G_USHLSAT) + +/// Generic saturating signed left shift. +HANDLE_TARGET_OPCODE(G_SSHLSAT) + +// Perform signed fixed point multiplication +HANDLE_TARGET_OPCODE(G_SMULFIX) + +// Perform unsigned fixed point multiplication +HANDLE_TARGET_OPCODE(G_UMULFIX) + +// Perform signed, saturating fixed point multiplication +HANDLE_TARGET_OPCODE(G_SMULFIXSAT) + +// Perform unsigned, saturating fixed point multiplication +HANDLE_TARGET_OPCODE(G_UMULFIXSAT) + +// Perform signed fixed point division +HANDLE_TARGET_OPCODE(G_SDIVFIX) + +// Perform unsigned fixed point division +HANDLE_TARGET_OPCODE(G_UDIVFIX) + +// Perform signed, saturating fixed point division +HANDLE_TARGET_OPCODE(G_SDIVFIXSAT) + +// Perform unsigned, saturating fixed point division +HANDLE_TARGET_OPCODE(G_UDIVFIXSAT) + /// Generic FP addition. HANDLE_TARGET_OPCODE(G_FADD) @@ -493,6 +536,9 @@ HANDLE_TARGET_OPCODE(G_FREM) /// Generic FP exponentiation. HANDLE_TARGET_OPCODE(G_FPOW) +/// Generic FP exponentiation, with an integer exponent. +HANDLE_TARGET_OPCODE(G_FPOWI) + /// Generic base-e exponential of a value. HANDLE_TARGET_OPCODE(G_FEXP) @@ -571,6 +617,9 @@ HANDLE_TARGET_OPCODE(G_UMIN) /// Generic unsigned integer maximum. HANDLE_TARGET_OPCODE(G_UMAX) +/// Generic integer absolute value. +HANDLE_TARGET_OPCODE(G_ABS) + /// Generic BRANCH instruction. This is an unconditional branch. HANDLE_TARGET_OPCODE(G_BR) @@ -655,10 +704,36 @@ HANDLE_TARGET_OPCODE(G_READ_REGISTER) /// write_register intrinsic HANDLE_TARGET_OPCODE(G_WRITE_REGISTER) +/// llvm.memcpy intrinsic +HANDLE_TARGET_OPCODE(G_MEMCPY) + +/// llvm.memmove intrinsic +HANDLE_TARGET_OPCODE(G_MEMMOVE) + +/// llvm.memset intrinsic +HANDLE_TARGET_OPCODE(G_MEMSET) + +/// Vector reductions +HANDLE_TARGET_OPCODE(G_VECREDUCE_SEQ_FADD) +HANDLE_TARGET_OPCODE(G_VECREDUCE_SEQ_FMUL) +HANDLE_TARGET_OPCODE(G_VECREDUCE_FADD) +HANDLE_TARGET_OPCODE(G_VECREDUCE_FMUL) +HANDLE_TARGET_OPCODE(G_VECREDUCE_FMAX) +HANDLE_TARGET_OPCODE(G_VECREDUCE_FMIN) +HANDLE_TARGET_OPCODE(G_VECREDUCE_ADD) +HANDLE_TARGET_OPCODE(G_VECREDUCE_MUL) +HANDLE_TARGET_OPCODE(G_VECREDUCE_AND) +HANDLE_TARGET_OPCODE(G_VECREDUCE_OR) +HANDLE_TARGET_OPCODE(G_VECREDUCE_XOR) +HANDLE_TARGET_OPCODE(G_VECREDUCE_SMAX) +HANDLE_TARGET_OPCODE(G_VECREDUCE_SMIN) +HANDLE_TARGET_OPCODE(G_VECREDUCE_UMAX) +HANDLE_TARGET_OPCODE(G_VECREDUCE_UMIN) + /// Marker for the end of the generic opcode. /// This is used to check if an opcode is in the range of the /// generic opcodes. -HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_WRITE_REGISTER) +HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_END, G_VECREDUCE_UMIN) /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific post-isel opcode values start here. diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h index a0bd88c153b6..450e713f27f2 100644 --- a/llvm/include/llvm/Support/TargetParser.h +++ b/llvm/include/llvm/Support/TargetParser.h @@ -62,17 +62,20 @@ enum GPUKind : uint32_t { // AMDGCN-based processors. GK_GFX600 = 32, GK_GFX601 = 33, + GK_GFX602 = 34, GK_GFX700 = 40, GK_GFX701 = 41, GK_GFX702 = 42, GK_GFX703 = 43, GK_GFX704 = 44, + GK_GFX705 = 45, GK_GFX801 = 50, GK_GFX802 = 51, GK_GFX803 = 52, - GK_GFX810 = 53, + GK_GFX805 = 53, + GK_GFX810 = 54, GK_GFX900 = 60, GK_GFX902 = 61, @@ -80,14 +83,18 @@ enum GPUKind : uint32_t { GK_GFX906 = 63, GK_GFX908 = 64, GK_GFX909 = 65, + GK_GFX90C = 66, GK_GFX1010 = 71, GK_GFX1011 = 72, GK_GFX1012 = 73, GK_GFX1030 = 75, + GK_GFX1031 = 76, + GK_GFX1032 = 77, + GK_GFX1033 = 78, GK_AMDGCN_FIRST = GK_GFX600, - GK_AMDGCN_LAST = GK_GFX1030, + GK_AMDGCN_LAST = GK_GFX1033, }; /// Instruction set architecture version. @@ -112,12 +119,18 @@ enum ArchFeatureKind : uint32_t { FEATURE_FAST_DENORMAL_F32 = 1 << 5, // Wavefront 32 is available. - FEATURE_WAVE32 = 1 << 6 + FEATURE_WAVE32 = 1 << 6, + + // Xnack is available. + FEATURE_XNACK = 1 << 7, + + // Sram-ecc is available. + FEATURE_SRAMECC = 1 << 8, }; StringRef getArchNameAMDGCN(GPUKind AK); StringRef getArchNameR600(GPUKind AK); -StringRef getCanonicalArchName(StringRef Arch); +StringRef getCanonicalArchName(const Triple &T, StringRef Arch); GPUKind parseArchAMDGCN(StringRef CPU); GPUKind parseArchR600(StringRef CPU); unsigned getArchAttrAMDGCN(GPUKind AK); @@ -130,6 +143,36 @@ IsaVersion getIsaVersion(StringRef GPU); } // namespace AMDGPU +namespace RISCV { + +enum CPUKind : unsigned { +#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) CK_##ENUM, +#include "RISCVTargetParser.def" +}; + +enum FeatureKind : unsigned { + FK_INVALID = 0, + FK_NONE = 1, + FK_STDEXTM = 1 << 2, + FK_STDEXTA = 1 << 3, + FK_STDEXTF = 1 << 4, + FK_STDEXTD = 1 << 5, + FK_STDEXTC = 1 << 6, + FK_64BIT = 1 << 7, +}; + +bool checkCPUKind(CPUKind Kind, bool IsRV64); +bool checkTuneCPUKind(CPUKind Kind, bool IsRV64); +CPUKind parseCPUKind(StringRef CPU); +CPUKind parseTuneCPUKind(StringRef CPU, bool IsRV64); +StringRef getMArchFromMcpu(StringRef CPU); +void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64); +void fillValidTuneCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64); +bool getCPUFeaturesExceptStdExt(CPUKind Kind, std::vector<StringRef> &Features); +StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsRV64); + +} // namespace RISCV + } // namespace llvm #endif diff --git a/llvm/include/llvm/Support/TargetRegistry.h b/llvm/include/llvm/Support/TargetRegistry.h index d91eabae8235..2c65eb60f910 100644 --- a/llvm/include/llvm/Support/TargetRegistry.h +++ b/llvm/include/llvm/Support/TargetRegistry.h @@ -510,6 +510,8 @@ public: S = createWasmStreamer(Ctx, std::move(TAB), std::move(OW), std::move(Emitter), RelaxAll); break; + case Triple::GOFF: + report_fatal_error("GOFF MCObjectStreamer not implemented yet"); case Triple::XCOFF: S = createXCOFFStreamer(Ctx, std::move(TAB), std::move(OW), std::move(Emitter), RelaxAll); diff --git a/llvm/include/llvm/Support/TaskQueue.h b/llvm/include/llvm/Support/TaskQueue.h index 4ceb056391af..6901a550b62f 100644 --- a/llvm/include/llvm/Support/TaskQueue.h +++ b/llvm/include/llvm/Support/TaskQueue.h @@ -98,7 +98,7 @@ public: IsTaskInFlight = true; } } - return std::move(F); + return F; } private: diff --git a/llvm/include/llvm/Support/Threading.h b/llvm/include/llvm/Support/Threading.h index 13000575f270..46cf82524e57 100644 --- a/llvm/include/llvm/Support/Threading.h +++ b/llvm/include/llvm/Support/Threading.h @@ -210,7 +210,7 @@ void llvm_execute_on_thread_async( return heavyweight_hardware_concurrency(); } - /// Returns a default thread strategy where all available hardware ressources + /// Returns a default thread strategy where all available hardware resources /// are to be used, except for those initially excluded by an affinity mask. /// This function takes affinity into consideration. Returns 1 when LLVM is /// configured with LLVM_ENABLE_THREADS=OFF. @@ -220,6 +220,16 @@ void llvm_execute_on_thread_async( return S; } + /// Returns an optimal thread strategy to execute specified amount of tasks. + /// This strategy should prevent us from creating too many threads if we + /// occasionaly have an unexpectedly small amount of tasks. + inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) { + ThreadPoolStrategy S; + S.Limit = true; + S.ThreadsRequested = TaskCount; + return S; + } + /// Return the current thread id, as used in various OS system calls. /// Note that not all platforms guarantee that the value returned will be /// unique across the entire system, so portable code should not assume diff --git a/llvm/include/llvm/Support/ToolOutputFile.h b/llvm/include/llvm/Support/ToolOutputFile.h index cf01b9ecefc5..ec1d6ae52268 100644 --- a/llvm/include/llvm/Support/ToolOutputFile.h +++ b/llvm/include/llvm/Support/ToolOutputFile.h @@ -35,6 +35,7 @@ class ToolOutputFile { /// The flag which indicates whether we should not delete the file. bool Keep; + StringRef getFilename() { return Filename; } explicit CleanupInstaller(StringRef Filename); ~CleanupInstaller(); } Installer; @@ -57,6 +58,9 @@ public: /// Return the contained raw_fd_ostream. raw_fd_ostream &os() { return *OS; } + /// Return the filename initialized with. + StringRef getFilename() { return Installer.getFilename(); } + /// Indicate that the tool's job wrt this output file has been successful and /// the file should not be deleted. void keep() { Installer.Keep = true; } diff --git a/llvm/include/llvm/Support/TrigramIndex.h b/llvm/include/llvm/Support/TrigramIndex.h index d635694eb5fd..0be6a1012718 100644 --- a/llvm/include/llvm/Support/TrigramIndex.h +++ b/llvm/include/llvm/Support/TrigramIndex.h @@ -27,7 +27,7 @@ #define LLVM_SUPPORT_TRIGRAMINDEX_H #include "llvm/ADT/SmallVector.h" - +#include "llvm/ADT/StringRef.h" #include <string> #include <unordered_map> #include <vector> @@ -38,7 +38,7 @@ class StringRef; class TrigramIndex { public: /// Inserts a new Regex into the index. - void insert(std::string Regex); + void insert(const std::string &Regex); /// Returns true, if special case list definitely does not have a line /// that matches the query. Returns false, if it's not sure. diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h index 76564c401e8e..d277affdbb23 100644 --- a/llvm/include/llvm/Support/TypeSize.h +++ b/llvm/include/llvm/Support/TypeSize.h @@ -15,152 +15,417 @@ #ifndef LLVM_SUPPORT_TYPESIZE_H #define LLVM_SUPPORT_TYPESIZE_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/WithColor.h" -#include <cstdint> +#include <algorithm> +#include <array> #include <cassert> +#include <cstdint> +#include <type_traits> namespace llvm { -template <typename T> struct DenseMapInfo; +template <typename LeafTy> struct LinearPolyBaseTypeTraits {}; + +//===----------------------------------------------------------------------===// +// LinearPolyBase - a base class for linear polynomials with multiple +// dimensions. This can e.g. be used to describe offsets that are have both a +// fixed and scalable component. +//===----------------------------------------------------------------------===// + +/// LinearPolyBase describes a linear polynomial: +/// c0 * scale0 + c1 * scale1 + ... + cK * scaleK +/// where the scale is implicit, so only the coefficients are encoded. +template <typename LeafTy> +class LinearPolyBase { +public: + using ScalarTy = typename LinearPolyBaseTypeTraits<LeafTy>::ScalarTy; + static constexpr auto Dimensions = LinearPolyBaseTypeTraits<LeafTy>::Dimensions; + static_assert(Dimensions != std::numeric_limits<unsigned>::max(), + "Dimensions out of range"); + +private: + std::array<ScalarTy, Dimensions> Coefficients; + +protected: + LinearPolyBase(ArrayRef<ScalarTy> Values) { + std::copy(Values.begin(), Values.end(), Coefficients.begin()); + } -class ElementCount { public: - unsigned Min; // Minimum number of vector elements. - bool Scalable; // If true, NumElements is a multiple of 'Min' determined - // at runtime rather than compile time. + friend LeafTy &operator+=(LeafTy &LHS, const LeafTy &RHS) { + for (unsigned I=0; I<Dimensions; ++I) + LHS.Coefficients[I] += RHS.Coefficients[I]; + return LHS; + } - ElementCount() = default; + friend LeafTy &operator-=(LeafTy &LHS, const LeafTy &RHS) { + for (unsigned I=0; I<Dimensions; ++I) + LHS.Coefficients[I] -= RHS.Coefficients[I]; + return LHS; + } - ElementCount(unsigned Min, bool Scalable) - : Min(Min), Scalable(Scalable) {} + friend LeafTy &operator*=(LeafTy &LHS, ScalarTy RHS) { + for (auto &C : LHS.Coefficients) + C *= RHS; + return LHS; + } - ElementCount operator*(unsigned RHS) { - return { Min * RHS, Scalable }; + friend LeafTy operator+(const LeafTy &LHS, const LeafTy &RHS) { + LeafTy Copy = LHS; + return Copy += RHS; } - ElementCount operator/(unsigned RHS) { - assert(Min % RHS == 0 && "Min is not a multiple of RHS."); - return { Min / RHS, Scalable }; + + friend LeafTy operator-(const LeafTy &LHS, const LeafTy &RHS) { + LeafTy Copy = LHS; + return Copy -= RHS; } - bool operator==(const ElementCount& RHS) const { - return Min == RHS.Min && Scalable == RHS.Scalable; + friend LeafTy operator*(const LeafTy &LHS, ScalarTy RHS) { + LeafTy Copy = LHS; + return Copy *= RHS; } - bool operator!=(const ElementCount& RHS) const { + + template <typename U = ScalarTy> + friend typename std::enable_if_t<std::is_signed<U>::value, LeafTy> + operator-(const LeafTy &LHS) { + LeafTy Copy = LHS; + return Copy *= -1; + } + + bool operator==(const LinearPolyBase &RHS) const { + return std::equal(Coefficients.begin(), Coefficients.end(), + RHS.Coefficients.begin()); + } + + bool operator!=(const LinearPolyBase &RHS) const { return !(*this == RHS); } - bool operator==(unsigned RHS) const { return Min == RHS && !Scalable; } - bool operator!=(unsigned RHS) const { return !(*this == RHS); } - ElementCount NextPowerOf2() const { - return ElementCount(llvm::NextPowerOf2(Min), Scalable); + bool isZero() const { + return all_of(Coefficients, [](const ScalarTy &C) { return C == 0; }); } + bool isNonZero() const { return !isZero(); } + explicit operator bool() const { return isNonZero(); } + + ScalarTy getValue(unsigned Dim) const { return Coefficients[Dim]; } }; -// This class is used to represent the size of types. If the type is of fixed -// size, it will represent the exact size. If the type is a scalable vector, -// it will represent the known minimum size. -class TypeSize { - uint64_t MinSize; // The known minimum size. - bool IsScalable; // If true, then the runtime size is an integer multiple - // of MinSize. +//===----------------------------------------------------------------------===// +// StackOffset - Represent an offset with named fixed and scalable components. +//===----------------------------------------------------------------------===// + +class StackOffset; +template <> struct LinearPolyBaseTypeTraits<StackOffset> { + using ScalarTy = int64_t; + static constexpr unsigned Dimensions = 2; +}; + +/// StackOffset is a class to represent an offset with 2 dimensions, +/// named fixed and scalable, respectively. This class allows a value for both +/// dimensions to depict e.g. "8 bytes and 16 scalable bytes", which is needed +/// to represent stack offsets. +class StackOffset : public LinearPolyBase<StackOffset> { +protected: + StackOffset(ScalarTy Fixed, ScalarTy Scalable) + : LinearPolyBase<StackOffset>({Fixed, Scalable}) {} + +public: + StackOffset() : StackOffset({0, 0}) {} + StackOffset(const LinearPolyBase<StackOffset> &Other) + : LinearPolyBase<StackOffset>(Other) {} + static StackOffset getFixed(ScalarTy Fixed) { return {Fixed, 0}; } + static StackOffset getScalable(ScalarTy Scalable) { return {0, Scalable}; } + static StackOffset get(ScalarTy Fixed, ScalarTy Scalable) { + return {Fixed, Scalable}; + } + + ScalarTy getFixed() const { return this->getValue(0); } + ScalarTy getScalable() const { return this->getValue(1); } +}; + +//===----------------------------------------------------------------------===// +// UnivariateLinearPolyBase - a base class for linear polynomials with multiple +// dimensions, but where only one dimension can be set at any time. +// This can e.g. be used to describe sizes that are either fixed or scalable. +//===----------------------------------------------------------------------===// +/// UnivariateLinearPolyBase is a base class for ElementCount and TypeSize. +/// Like LinearPolyBase it tries to represent a linear polynomial +/// where only one dimension can be set at any time, e.g. +/// 0 * scale0 + 0 * scale1 + ... + cJ * scaleJ + ... + 0 * scaleK +/// The dimension that is set is the univariate dimension. +template <typename LeafTy> +class UnivariateLinearPolyBase { public: - constexpr TypeSize(uint64_t MinSize, bool Scalable) - : MinSize(MinSize), IsScalable(Scalable) {} + using ScalarTy = typename LinearPolyBaseTypeTraits<LeafTy>::ScalarTy; + static constexpr auto Dimensions = LinearPolyBaseTypeTraits<LeafTy>::Dimensions; + static_assert(Dimensions != std::numeric_limits<unsigned>::max(), + "Dimensions out of range"); + +protected: + ScalarTy Value; // The value at the univeriate dimension. + unsigned UnivariateDim; // The univeriate dimension. + + UnivariateLinearPolyBase(ScalarTy Val, unsigned UnivariateDim) + : Value(Val), UnivariateDim(UnivariateDim) { + assert(UnivariateDim < Dimensions && "Dimension out of range"); + } + + friend LeafTy &operator+=(LeafTy &LHS, const LeafTy &RHS) { + assert(LHS.UnivariateDim == RHS.UnivariateDim && "Invalid dimensions"); + LHS.Value += RHS.Value; + return LHS; + } + + friend LeafTy &operator-=(LeafTy &LHS, const LeafTy &RHS) { + assert(LHS.UnivariateDim == RHS.UnivariateDim && "Invalid dimensions"); + LHS.Value -= RHS.Value; + return LHS; + } + + friend LeafTy &operator*=(LeafTy &LHS, ScalarTy RHS) { + LHS.Value *= RHS; + return LHS; + } - static constexpr TypeSize Fixed(uint64_t Size) { - return TypeSize(Size, /*IsScalable=*/false); + friend LeafTy operator+(const LeafTy &LHS, const LeafTy &RHS) { + LeafTy Copy = LHS; + return Copy += RHS; } - static constexpr TypeSize Scalable(uint64_t MinSize) { - return TypeSize(MinSize, /*IsScalable=*/true); + friend LeafTy operator-(const LeafTy &LHS, const LeafTy &RHS) { + LeafTy Copy = LHS; + return Copy -= RHS; } - // Scalable vector types with the same minimum size as a fixed size type are - // not guaranteed to be the same size at runtime, so they are never - // considered to be equal. - friend bool operator==(const TypeSize &LHS, const TypeSize &RHS) { - return LHS.MinSize == RHS.MinSize && LHS.IsScalable == RHS.IsScalable; + friend LeafTy operator*(const LeafTy &LHS, ScalarTy RHS) { + LeafTy Copy = LHS; + return Copy *= RHS; } - friend bool operator!=(const TypeSize &LHS, const TypeSize &RHS) { - return !(LHS == RHS); + template <typename U = ScalarTy> + friend typename std::enable_if<std::is_signed<U>::value, LeafTy>::type + operator-(const LeafTy &LHS) { + LeafTy Copy = LHS; + return Copy *= -1; } - // For many cases, size ordering between scalable and fixed size types cannot +public: + bool operator==(const UnivariateLinearPolyBase &RHS) const { + return Value == RHS.Value && UnivariateDim == RHS.UnivariateDim; + } + + bool operator!=(const UnivariateLinearPolyBase &RHS) const { + return !(*this == RHS); + } + + bool isZero() const { return !Value; } + bool isNonZero() const { return !isZero(); } + explicit operator bool() const { return isNonZero(); } + ScalarTy getValue() const { return Value; } + ScalarTy getValue(unsigned Dim) const { + return Dim == UnivariateDim ? Value : 0; + } + + /// Add \p RHS to the value at the univariate dimension. + LeafTy getWithIncrement(ScalarTy RHS) { + return static_cast<LeafTy>( + UnivariateLinearPolyBase(Value + RHS, UnivariateDim)); + } + + /// Subtract \p RHS from the value at the univariate dimension. + LeafTy getWithDecrement(ScalarTy RHS) { + return static_cast<LeafTy>( + UnivariateLinearPolyBase(Value - RHS, UnivariateDim)); + } +}; + + +//===----------------------------------------------------------------------===// +// LinearPolySize - base class for fixed- or scalable sizes. +// ^ ^ +// | | +// | +----- ElementCount - Leaf class to represent an element count +// | (vscale x unsigned) +// | +// +-------- TypeSize - Leaf class to represent a type size +// (vscale x uint64_t) +//===----------------------------------------------------------------------===// + +/// LinearPolySize is a base class to represent sizes. It is either +/// fixed-sized or it is scalable-sized, but it cannot be both. +template <typename LeafTy> +class LinearPolySize : public UnivariateLinearPolyBase<LeafTy> { + // Make the parent class a friend, so that it can access the protected + // conversion/copy-constructor for UnivariatePolyBase<LeafTy> -> + // LinearPolySize<LeafTy>. + friend class UnivariateLinearPolyBase<LeafTy>; + +public: + using ScalarTy = typename UnivariateLinearPolyBase<LeafTy>::ScalarTy; + enum Dims : unsigned { FixedDim = 0, ScalableDim = 1 }; + +protected: + LinearPolySize(ScalarTy MinVal, Dims D) + : UnivariateLinearPolyBase<LeafTy>(MinVal, D) {} + + LinearPolySize(const UnivariateLinearPolyBase<LeafTy> &V) + : UnivariateLinearPolyBase<LeafTy>(V) {} + +public: + + static LeafTy getFixed(ScalarTy MinVal) { + return static_cast<LeafTy>(LinearPolySize(MinVal, FixedDim)); + } + static LeafTy getScalable(ScalarTy MinVal) { + return static_cast<LeafTy>(LinearPolySize(MinVal, ScalableDim)); + } + static LeafTy get(ScalarTy MinVal, bool Scalable) { + return static_cast<LeafTy>( + LinearPolySize(MinVal, Scalable ? ScalableDim : FixedDim)); + } + static LeafTy getNull() { return get(0, false); } + + /// Returns the minimum value this size can represent. + ScalarTy getKnownMinValue() const { return this->getValue(); } + /// Returns whether the size is scaled by a runtime quantity (vscale). + bool isScalable() const { return this->UnivariateDim == ScalableDim; } + /// A return value of true indicates we know at compile time that the number + /// of elements (vscale * Min) is definitely even. However, returning false + /// does not guarantee that the total number of elements is odd. + bool isKnownEven() const { return (getKnownMinValue() & 0x1) == 0; } + /// This function tells the caller whether the element count is known at + /// compile time to be a multiple of the scalar value RHS. + bool isKnownMultipleOf(ScalarTy RHS) const { + return getKnownMinValue() % RHS == 0; + } + + // Return the minimum value with the assumption that the count is exact. + // Use in places where a scalable count doesn't make sense (e.g. non-vector + // types, or vectors in backends which don't support scalable vectors). + ScalarTy getFixedValue() const { + assert(!isScalable() && + "Request for a fixed element count on a scalable object"); + return getKnownMinValue(); + } + + // For some cases, size ordering between scalable and fixed size types cannot // be determined at compile time, so such comparisons aren't allowed. // // e.g. <vscale x 2 x i16> could be bigger than <4 x i32> with a runtime // vscale >= 5, equal sized with a vscale of 4, and smaller with // a vscale <= 3. // - // If the scalable flags match, just perform the requested comparison - // between the minimum sizes. - friend bool operator<(const TypeSize &LHS, const TypeSize &RHS) { - assert(LHS.IsScalable == RHS.IsScalable && - "Ordering comparison of scalable and fixed types"); + // All the functions below make use of the fact vscale is always >= 1, which + // means that <vscale x 4 x i32> is guaranteed to be >= <4 x i32>, etc. - return LHS.MinSize < RHS.MinSize; + static bool isKnownLT(const LinearPolySize &LHS, const LinearPolySize &RHS) { + if (!LHS.isScalable() || RHS.isScalable()) + return LHS.getKnownMinValue() < RHS.getKnownMinValue(); + return false; } - friend bool operator>(const TypeSize &LHS, const TypeSize &RHS) { - return RHS < LHS; + static bool isKnownGT(const LinearPolySize &LHS, const LinearPolySize &RHS) { + if (LHS.isScalable() || !RHS.isScalable()) + return LHS.getKnownMinValue() > RHS.getKnownMinValue(); + return false; } - friend bool operator<=(const TypeSize &LHS, const TypeSize &RHS) { - return !(RHS < LHS); + static bool isKnownLE(const LinearPolySize &LHS, const LinearPolySize &RHS) { + if (!LHS.isScalable() || RHS.isScalable()) + return LHS.getKnownMinValue() <= RHS.getKnownMinValue(); + return false; } - friend bool operator>=(const TypeSize &LHS, const TypeSize& RHS) { - return !(LHS < RHS); + static bool isKnownGE(const LinearPolySize &LHS, const LinearPolySize &RHS) { + if (LHS.isScalable() || !RHS.isScalable()) + return LHS.getKnownMinValue() >= RHS.getKnownMinValue(); + return false; } - // Convenience operators to obtain relative sizes independently of - // the scalable flag. - TypeSize operator*(unsigned RHS) const { - return { MinSize * RHS, IsScalable }; + /// We do not provide the '/' operator here because division for polynomial + /// types does not work in the same way as for normal integer types. We can + /// only divide the minimum value (or coefficient) by RHS, which is not the + /// same as + /// (Min * Vscale) / RHS + /// The caller is recommended to use this function in combination with + /// isKnownMultipleOf(RHS), which lets the caller know if it's possible to + /// perform a lossless divide by RHS. + LeafTy divideCoefficientBy(ScalarTy RHS) const { + return static_cast<LeafTy>( + LinearPolySize::get(getKnownMinValue() / RHS, isScalable())); } - friend TypeSize operator*(const unsigned LHS, const TypeSize &RHS) { - return { LHS * RHS.MinSize, RHS.IsScalable }; + LeafTy coefficientNextPowerOf2() const { + return static_cast<LeafTy>(LinearPolySize::get( + static_cast<ScalarTy>(llvm::NextPowerOf2(getKnownMinValue())), + isScalable())); } - TypeSize operator/(unsigned RHS) const { - return { MinSize / RHS, IsScalable }; + /// Printing function. + void print(raw_ostream &OS) const { + if (isScalable()) + OS << "vscale x "; + OS << getKnownMinValue(); } +}; - // Return the minimum size with the assumption that the size is exact. - // Use in places where a scalable size doesn't make sense (e.g. non-vector - // types, or vectors in backends which don't support scalable vectors). - uint64_t getFixedSize() const { - assert(!IsScalable && "Request for a fixed size on a scalable object"); - return MinSize; - } +class ElementCount; +template <> struct LinearPolyBaseTypeTraits<ElementCount> { + using ScalarTy = unsigned; + static constexpr unsigned Dimensions = 2; +}; - // Return the known minimum size. Use in places where the scalable property - // doesn't matter (e.g. determining alignment) or in conjunction with the - // isScalable method below. - uint64_t getKnownMinSize() const { - return MinSize; - } +class ElementCount : public LinearPolySize<ElementCount> { +public: - // Return whether or not the size is scalable. - bool isScalable() const { - return IsScalable; - } + ElementCount(const LinearPolySize<ElementCount> &V) : LinearPolySize(V) {} - // Returns true if the number of bits is a multiple of an 8-bit byte. - bool isByteSized() const { - return (MinSize & 7) == 0; + /// Counting predicates. + /// + ///@{ Number of elements.. + /// Exactly one element. + bool isScalar() const { return !isScalable() && getKnownMinValue() == 1; } + /// One or more elements. + bool isVector() const { + return (isScalable() && getKnownMinValue() != 0) || getKnownMinValue() > 1; } + ///@} +}; - // Returns true if the type size is non-zero. - bool isNonZero() const { return MinSize != 0; } +// This class is used to represent the size of types. If the type is of fixed +class TypeSize; +template <> struct LinearPolyBaseTypeTraits<TypeSize> { + using ScalarTy = uint64_t; + static constexpr unsigned Dimensions = 2; +}; - // Returns true if the type size is zero. - bool isZero() const { return MinSize == 0; } +// TODO: Most functionality in this class will gradually be phased out +// so it will resemble LinearPolySize as much as possible. +// +// TypeSize is used to represent the size of types. If the type is of fixed +// size, it will represent the exact size. If the type is a scalable vector, +// it will represent the known minimum size. +class TypeSize : public LinearPolySize<TypeSize> { +public: + TypeSize(const LinearPolySize<TypeSize> &V) : LinearPolySize(V) {} + TypeSize(ScalarTy MinVal, bool IsScalable) + : LinearPolySize(LinearPolySize::get(MinVal, IsScalable)) {} + + static TypeSize Fixed(ScalarTy MinVal) { return TypeSize(MinVal, false); } + static TypeSize Scalable(ScalarTy MinVal) { return TypeSize(MinVal, true); } + + ScalarTy getFixedSize() const { return getFixedValue(); } + ScalarTy getKnownMinSize() const { return getKnownMinValue(); } + + // All code for this class below this point is needed because of the + // temporary implicit conversion to uint64_t. The operator overloads are + // needed because otherwise the conversion of the parent class + // UnivariateLinearPolyBase -> TypeSize is ambiguous. + // TODO: Remove the implicit conversion. // Casts to a uint64_t if this is a fixed-width size. // @@ -173,68 +438,54 @@ public: // To determine how to upgrade the code: // // if (<algorithm works for both scalable and fixed-width vectors>) - // use getKnownMinSize() + // use getKnownMinValue() // else if (<algorithm works only for fixed-width vectors>) { // if <algorithm can be adapted for both scalable and fixed-width vectors> - // update the algorithm and use getKnownMinSize() + // update the algorithm and use getKnownMinValue() // else - // bail out early for scalable vectors and use getFixedSize() + // bail out early for scalable vectors and use getFixedValue() // } - operator uint64_t() const { + operator ScalarTy() const { #ifdef STRICT_FIXED_SIZE_VECTORS - return getFixedSize(); + return getFixedValue(); #else if (isScalable()) WithColor::warning() << "Compiler has made implicit assumption that " "TypeSize is not scalable. This may or may not " "lead to broken code.\n"; - return getKnownMinSize(); + return getKnownMinValue(); #endif } - // Additional convenience operators needed to avoid ambiguous parses. - // TODO: Make uint64_t the default operator? - TypeSize operator*(uint64_t RHS) const { - return { MinSize * RHS, IsScalable }; + // Additional operators needed to avoid ambiguous parses + // because of the implicit conversion hack. + friend TypeSize operator*(const TypeSize &LHS, const int RHS) { + return LHS * (ScalarTy)RHS; } - - TypeSize operator*(int RHS) const { - return { MinSize * RHS, IsScalable }; + friend TypeSize operator*(const TypeSize &LHS, const unsigned RHS) { + return LHS * (ScalarTy)RHS; } - - TypeSize operator*(int64_t RHS) const { - return { MinSize * RHS, IsScalable }; + friend TypeSize operator*(const TypeSize &LHS, const int64_t RHS) { + return LHS * (ScalarTy)RHS; } - - friend TypeSize operator*(const uint64_t LHS, const TypeSize &RHS) { - return { LHS * RHS.MinSize, RHS.IsScalable }; - } - friend TypeSize operator*(const int LHS, const TypeSize &RHS) { - return { LHS * RHS.MinSize, RHS.IsScalable }; - } - - friend TypeSize operator*(const int64_t LHS, const TypeSize &RHS) { - return { LHS * RHS.MinSize, RHS.IsScalable }; - } - - TypeSize operator/(uint64_t RHS) const { - return { MinSize / RHS, IsScalable }; + return RHS * LHS; } - - TypeSize operator/(int RHS) const { - return { MinSize / RHS, IsScalable }; + friend TypeSize operator*(const unsigned LHS, const TypeSize &RHS) { + return RHS * LHS; } - - TypeSize operator/(int64_t RHS) const { - return { MinSize / RHS, IsScalable }; + friend TypeSize operator*(const int64_t LHS, const TypeSize &RHS) { + return RHS * LHS; } - - TypeSize NextPowerOf2() const { - return TypeSize(llvm::NextPowerOf2(MinSize), IsScalable); + friend TypeSize operator*(const uint64_t LHS, const TypeSize &RHS) { + return RHS * LHS; } }; +//===----------------------------------------------------------------------===// +// Utilities +//===----------------------------------------------------------------------===// + /// Returns a TypeSize with a known minimum size that is the next integer /// (mod 2**64) that is greater than or equal to \p Value and is a multiple /// of \p Align. \p Align must be non-zero. @@ -242,21 +493,35 @@ public: /// Similar to the alignTo functions in MathExtras.h inline TypeSize alignTo(TypeSize Size, uint64_t Align) { assert(Align != 0u && "Align must be non-zero"); - return {(Size.getKnownMinSize() + Align - 1) / Align * Align, + return {(Size.getKnownMinValue() + Align - 1) / Align * Align, Size.isScalable()}; } +/// Stream operator function for `LinearPolySize`. +template <typename LeafTy> +inline raw_ostream &operator<<(raw_ostream &OS, + const LinearPolySize<LeafTy> &PS) { + PS.print(OS); + return OS; +} + +template <typename T> struct DenseMapInfo; template <> struct DenseMapInfo<ElementCount> { - static inline ElementCount getEmptyKey() { return {~0U, true}; } - static inline ElementCount getTombstoneKey() { return {~0U - 1, false}; } - static unsigned getHashValue(const ElementCount& EltCnt) { - if (EltCnt.Scalable) - return (EltCnt.Min * 37U) - 1U; + static inline ElementCount getEmptyKey() { + return ElementCount::getScalable(~0U); + } + static inline ElementCount getTombstoneKey() { + return ElementCount::getFixed(~0U - 1); + } + static unsigned getHashValue(const ElementCount &EltCnt) { + unsigned HashVal = EltCnt.getKnownMinValue() * 37U; + if (EltCnt.isScalable()) + return (HashVal - 1U); - return EltCnt.Min * 37U; + return HashVal; } - static bool isEqual(const ElementCount& LHS, const ElementCount& RHS) { + static bool isEqual(const ElementCount &LHS, const ElementCount &RHS) { return LHS == RHS; } }; diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h index af09c21085c5..c6ddbf60efdf 100644 --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -37,6 +37,7 @@ namespace llvm { class MemoryBuffer; +class MemoryBufferRef; class Twine; namespace vfs { @@ -463,7 +464,8 @@ public: /// false if the file or directory already exists in the file system with /// different contents. bool addFileNoOwn(const Twine &Path, time_t ModificationTime, - llvm::MemoryBuffer *Buffer, Optional<uint32_t> User = None, + const llvm::MemoryBufferRef &Buffer, + Optional<uint32_t> User = None, Optional<uint32_t> Group = None, Optional<llvm::sys::fs::file_type> Type = None, Optional<llvm::sys::fs::perms> Perms = None); @@ -498,7 +500,7 @@ llvm::sys::fs::UniqueID getNextVirtualUniqueID(); /// Gets a \p FileSystem for a virtual file system described in YAML /// format. -IntrusiveRefCntPtr<FileSystem> +std::unique_ptr<FileSystem> getVFSFromYAML(std::unique_ptr<llvm::MemoryBuffer> Buffer, llvm::SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath, void *DiagContext = nullptr, @@ -649,9 +651,12 @@ private: friend class VFSFromYamlDirIterImpl; friend class RedirectingFileSystemParser; - bool shouldUseExternalFS() const { - return ExternalFSValidWD && IsFallthrough; - } + bool shouldUseExternalFS() const { return IsFallthrough; } + + /// Canonicalize path by removing ".", "..", "./", components. This is + /// a VFS request, do not bother about symlinks in the path components + /// but canonicalize in order to perform the correct entry search. + std::error_code makeCanonical(SmallVectorImpl<char> &Path) const; // In a RedirectingFileSystem, keys can be specified in Posix or Windows // style (or even a mixture of both), so this comparison helper allows @@ -670,9 +675,6 @@ private: /// The current working directory of the file system. std::string WorkingDirectory; - /// Whether the current working directory is valid for the external FS. - bool ExternalFSValidWD = false; - /// The file system to use for external references. IntrusiveRefCntPtr<FileSystem> ExternalFS; @@ -720,15 +722,20 @@ private: public: /// Looks up \p Path in \c Roots. - ErrorOr<Entry *> lookupPath(const Twine &Path) const; + ErrorOr<Entry *> lookupPath(StringRef Path) const; /// Parses \p Buffer, which is expected to be in YAML format and /// returns a virtual file system representing its contents. - static RedirectingFileSystem * + static std::unique_ptr<RedirectingFileSystem> create(std::unique_ptr<MemoryBuffer> Buffer, SourceMgr::DiagHandlerTy DiagHandler, StringRef YAMLFilePath, void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS); + /// Redirect each of the remapped files from first to second. + static std::unique_ptr<RedirectingFileSystem> + create(ArrayRef<std::pair<std::string, std::string>> RemappedFiles, + bool UseExternalNames, FileSystem &ExternalFS); + ErrorOr<Status> status(const Twine &Path) override; ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override; @@ -749,6 +756,10 @@ public: StringRef getExternalContentsPrefixDir() const; + void setFallthrough(bool Fallthrough); + + std::vector<llvm::StringRef> getRoots() const; + void dump(raw_ostream &OS) const; void dumpEntry(raw_ostream &OS, Entry *E, int NumSpaces = 0) const; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/include/llvm/Support/Win64EH.h b/llvm/include/llvm/Support/Win64EH.h index 8220131e5be9..9359fcb4286a 100644 --- a/llvm/include/llvm/Support/Win64EH.h +++ b/llvm/include/llvm/Support/Win64EH.h @@ -38,12 +38,14 @@ enum UnwindOpcodes { // The following set of unwind opcodes is for ARM64. They are documented at // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling UOP_AllocMedium, + UOP_SaveR19R20X, UOP_SaveFPLRX, UOP_SaveFPLR, UOP_SaveReg, UOP_SaveRegX, UOP_SaveRegP, UOP_SaveRegPX, + UOP_SaveLRPair, UOP_SaveFReg, UOP_SaveFRegX, UOP_SaveFRegP, @@ -51,7 +53,11 @@ enum UnwindOpcodes { UOP_SetFP, UOP_AddFP, UOP_Nop, - UOP_End + UOP_End, + UOP_SaveNext, + UOP_TrapFrame, + UOP_Context, + UOP_ClearUnwoundToCall }; /// UnwindCode - This union describes a single operation in a function prolog, diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index 697f8c70f962..ec19ce4e7cdd 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -44,6 +44,7 @@ X86_CPU_TYPE(INTEL_KNM, "knm") X86_CPU_TYPE(INTEL_GOLDMONT, "goldmont") X86_CPU_TYPE(INTEL_GOLDMONT_PLUS, "goldmont-plus") X86_CPU_TYPE(INTEL_TREMONT, "tremont") +X86_CPU_TYPE(AMDFAM19H, "amdfam19h") // Alternate names supported by __builtin_cpu_is and target multiversioning. X86_CPU_TYPE_ALIAS(INTEL_BONNELL, "atom") @@ -84,6 +85,9 @@ X86_CPU_SUBTYPE(AMDFAM17H_ZNVER2, "znver2") X86_CPU_SUBTYPE(INTEL_COREI7_CASCADELAKE, "cascadelake") X86_CPU_SUBTYPE(INTEL_COREI7_TIGERLAKE, "tigerlake") X86_CPU_SUBTYPE(INTEL_COREI7_COOPERLAKE, "cooperlake") +X86_CPU_SUBTYPE(INTEL_COREI7_SAPPHIRERAPIDS, "sapphirerapids") +X86_CPU_SUBTYPE(INTEL_COREI7_ALDERLAKE, "alderlake") +X86_CPU_SUBTYPE(AMDFAM19H_ZNVER3, "znver3") #undef X86_CPU_SUBTYPE @@ -153,6 +157,8 @@ X86_FEATURE (F16C, "f16c") X86_FEATURE (FSGSBASE, "fsgsbase") X86_FEATURE (FXSR, "fxsr") X86_FEATURE (INVPCID, "invpcid") +X86_FEATURE (KL, "kl") +X86_FEATURE (WIDEKL, "widekl") X86_FEATURE (LWP, "lwp") X86_FEATURE (LZCNT, "lzcnt") X86_FEATURE (MOVBE, "movbe") @@ -175,6 +181,7 @@ X86_FEATURE (SHA, "sha") X86_FEATURE (SHSTK, "shstk") X86_FEATURE (TBM, "tbm") X86_FEATURE (TSXLDTRK, "tsxldtrk") +X86_FEATURE (UINTR, "uintr") X86_FEATURE (VAES, "vaes") X86_FEATURE (VZEROUPPER, "vzeroupper") X86_FEATURE (WAITPKG, "waitpkg") @@ -184,6 +191,8 @@ X86_FEATURE (XSAVE, "xsave") X86_FEATURE (XSAVEC, "xsavec") X86_FEATURE (XSAVEOPT, "xsaveopt") X86_FEATURE (XSAVES, "xsaves") +X86_FEATURE (HRESET, "hreset") +X86_FEATURE (AVXVNNI, "avxvnni") // These features aren't really CPU features, but the frontend can set them. X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk") X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches") diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h index 66c474b5c275..2d5083023a11 100644 --- a/llvm/include/llvm/Support/X86TargetParser.h +++ b/llvm/include/llvm/Support/X86TargetParser.h @@ -14,6 +14,7 @@ #define LLVM_SUPPORT_X86TARGETPARSERCOMMON_H #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" namespace llvm { class StringRef; @@ -99,6 +100,8 @@ enum CPUKind { CK_IcelakeClient, CK_IcelakeServer, CK_Tigerlake, + CK_SapphireRapids, + CK_Alderlake, CK_KNL, CK_KNM, CK_Lakemont, @@ -118,18 +121,26 @@ enum CPUKind { CK_BDVER4, CK_ZNVER1, CK_ZNVER2, + CK_ZNVER3, CK_x86_64, + CK_x86_64_v2, + CK_x86_64_v3, + CK_x86_64_v4, CK_Geode, }; /// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if /// \p Only64Bit is true. CPUKind parseArchX86(StringRef CPU, bool Only64Bit = false); +CPUKind parseTuneCPU(StringRef CPU, bool Only64Bit = false); /// Provide a list of valid CPU names. If \p Only64Bit is true, the list will /// only contain 64-bit capable CPUs. void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, - bool ArchIs32Bit); + bool Only64Bit = false); +/// Provide a list of valid -mtune names. +void fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values, + bool Only64Bit = false); /// Get the key feature prioritizing target multiversioning. ProcessorFeatures getKeyFeature(CPUKind Kind); @@ -137,10 +148,10 @@ ProcessorFeatures getKeyFeature(CPUKind Kind); /// Fill in the features that \p CPU supports into \p Features. void getFeaturesForCPU(StringRef CPU, SmallVectorImpl<StringRef> &Features); -/// Fill \p Features with the features that are implied to be enabled/disabled +/// Set or clear entries in \p Features that are implied to be enabled/disabled /// by the provided \p Feature. -void getImpliedFeatures(StringRef Feature, bool Enabled, - SmallVectorImpl<StringRef> &Features); +void updateImpliedFeatures(StringRef Feature, bool Enabled, + StringMap<bool> &Features); } // namespace X86 } // namespace llvm diff --git a/llvm/include/llvm/Support/YAMLParser.h b/llvm/include/llvm/Support/YAMLParser.h index 53009d7ff4aa..759e11afd447 100644 --- a/llvm/include/llvm/Support/YAMLParser.h +++ b/llvm/include/llvm/Support/YAMLParser.h @@ -40,6 +40,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" #include <cassert> #include <cstddef> #include <iterator> @@ -51,7 +52,6 @@ namespace llvm { class MemoryBufferRef; -class SourceMgr; class raw_ostream; class Twine; @@ -78,6 +78,9 @@ bool scanTokens(StringRef Input); /// escaped, but emitted verbatim. std::string escape(StringRef Input, bool EscapePrintable = true); +/// Parse \p S as a bool according to https://yaml.org/type/bool.html. +llvm::Optional<bool> parseBool(StringRef S); + /// This class represents a YAML stream potentially containing multiple /// documents. class Stream { @@ -100,7 +103,10 @@ public: return !failed(); } - void printError(Node *N, const Twine &Msg); + void printError(Node *N, const Twine &Msg, + SourceMgr::DiagKind Kind = SourceMgr::DK_Error); + void printError(const SMRange &Range, const Twine &Msg, + SourceMgr::DiagKind Kind = SourceMgr::DK_Error); private: friend class Document; @@ -222,7 +228,7 @@ public: /// Gets the value of this node as a StringRef. /// - /// \param Storage is used to store the content of the returned StringRef iff + /// \param Storage is used to store the content of the returned StringRef if /// it requires any modification from how it appeared in the source. /// This happens with escaped characters and multi-line literals. StringRef getValue(SmallVectorImpl<char> &Storage) const; @@ -509,7 +515,6 @@ public: : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {} StringRef getName() const { return Name; } - Node *getTarget(); static bool classof(const Node *N) { return N->getType() == NK_Alias; } diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h index 44e34a4a09b4..9ac9eb300983 100644 --- a/llvm/include/llvm/Support/YAMLTraits.h +++ b/llvm/include/llvm/Support/YAMLTraits.h @@ -19,7 +19,9 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/VersionTuple.h" #include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" #include <cassert> @@ -61,7 +63,7 @@ struct MappingTraits { // Must provide: // static void mapping(IO &io, T &fields); // Optionally may provide: - // static StringRef validate(IO &io, T &fields); + // static std::string validate(IO &io, T &fields); // // The optional flow flag will cause generated YAML to use a flow mapping // (e.g. { a: 0, b: 1 }): @@ -83,7 +85,7 @@ template <class T, class Context> struct MappingContextTraits { // Must provide: // static void mapping(IO &io, T &fields, Context &Ctx); // Optionally may provide: - // static StringRef validate(IO &io, T &fields, Context &Ctx); + // static std::string validate(IO &io, T &fields, Context &Ctx); // // The optional flow flag will cause generated YAML to use a flow mapping // (e.g. { a: 0, b: 1 }): @@ -421,7 +423,7 @@ template <class T> struct has_MappingTraits<T, EmptyContext> { // Test if MappingContextTraits<T>::validate() is defined on type T. template <class T, class Context> struct has_MappingValidateTraits { - using Signature_validate = StringRef (*)(class IO &, T &, Context &); + using Signature_validate = std::string (*)(class IO &, T &, Context &); template <typename U> static char test(SameType<Signature_validate, &U::validate>*); @@ -435,7 +437,7 @@ template <class T, class Context> struct has_MappingValidateTraits { // Test if MappingTraits<T>::validate() is defined on type T. template <class T> struct has_MappingValidateTraits<T, EmptyContext> { - using Signature_validate = StringRef (*)(class IO &, T &); + using Signature_validate = std::string (*)(class IO &, T &); template <typename U> static char test(SameType<Signature_validate, &U::validate> *); @@ -637,6 +639,7 @@ inline bool isNull(StringRef S) { } inline bool isBool(StringRef S) { + // FIXME: using parseBool is causing multiple tests to fail. return S.equals("true") || S.equals("True") || S.equals("TRUE") || S.equals("false") || S.equals("False") || S.equals("FALSE"); } @@ -789,6 +792,7 @@ public: virtual NodeKind getNodeKind() = 0; virtual void setError(const Twine &) = 0; + virtual void setAllowUnknownKeys(bool Allow); template <typename T> void enumCase(T &Val, const char* Str, const T ConstVal) { @@ -902,24 +906,7 @@ private: template <typename T, typename Context> void processKeyWithDefault(const char *Key, Optional<T> &Val, const Optional<T> &DefaultValue, bool Required, - Context &Ctx) { - assert(DefaultValue.hasValue() == false && - "Optional<T> shouldn't have a value!"); - void *SaveInfo; - bool UseDefault = true; - const bool sameAsDefault = outputting() && !Val.hasValue(); - if (!outputting() && !Val.hasValue()) - Val = T(); - if (Val.hasValue() && - this->preflightKey(Key, Required, sameAsDefault, UseDefault, - SaveInfo)) { - yamlize(*this, Val.getValue(), Required, Ctx); - this->postflightKey(SaveInfo); - } else { - if (UseDefault) - Val = DefaultValue; - } - } + Context &Ctx); template <typename T, typename Context> void processKeyWithDefault(const char *Key, T &Val, const T &DefaultValue, @@ -1057,7 +1044,7 @@ yamlize(IO &io, T &Val, bool, Context &Ctx) { else io.beginMapping(); if (io.outputting()) { - StringRef Err = MappingTraits<T>::validate(io, Val); + std::string Err = MappingTraits<T>::validate(io, Val); if (!Err.empty()) { errs() << Err << "\n"; assert(Err.empty() && "invalid struct trying to be written as yaml"); @@ -1065,7 +1052,7 @@ yamlize(IO &io, T &Val, bool, Context &Ctx) { } detail::doMapping(io, Val, Ctx); if (!io.outputting()) { - StringRef Err = MappingTraits<T>::validate(io, Val); + std::string Err = MappingTraits<T>::validate(io, Val); if (!Err.empty()) io.setError(Err); } @@ -1487,9 +1474,10 @@ private: static bool classof(const MapHNode *) { return true; } - using NameToNode = StringMap<std::unique_ptr<HNode>>; + using NameToNodeAndLoc = + StringMap<std::pair<std::unique_ptr<HNode>, SMRange>>; - NameToNode Mapping; + NameToNodeAndLoc Mapping; SmallVector<std::string, 6> ValidKeys; }; @@ -1511,6 +1499,11 @@ private: std::unique_ptr<Input::HNode> createHNodes(Node *node); void setError(HNode *hnode, const Twine &message); void setError(Node *node, const Twine &message); + void setError(const SMRange &Range, const Twine &message); + + void reportWarning(HNode *hnode, const Twine &message); + void reportWarning(Node *hnode, const Twine &message); + void reportWarning(const SMRange &Range, const Twine &message); public: // These are only used by operator>>. They could be private @@ -1521,6 +1514,8 @@ public: /// Returns the current node that's being parsed by the YAML Parser. const Node *getCurrentNode() const; + void setAllowUnknownKeys(bool Allow) override; + private: SourceMgr SrcMgr; // must be before Strm std::unique_ptr<llvm::yaml::Stream> Strm; @@ -1531,6 +1526,7 @@ private: std::vector<bool> BitValuesUsed; HNode *CurrentNode = nullptr; bool ScalarMatchFound = false; + bool AllowUnknownKeys = false; }; /// @@ -1590,7 +1586,7 @@ public: private: void output(StringRef s); void outputUpToEndOfLine(StringRef s); - void newLineCheck(); + void newLineCheck(bool EmptySequence = false); void outputNewLine(); void paddedKey(StringRef key); void flowKey(StringRef Key); @@ -1625,6 +1621,42 @@ private: StringRef PaddingBeforeContainer; }; +template <typename T, typename Context> +void IO::processKeyWithDefault(const char *Key, Optional<T> &Val, + const Optional<T> &DefaultValue, bool Required, + Context &Ctx) { + assert(DefaultValue.hasValue() == false && + "Optional<T> shouldn't have a value!"); + void *SaveInfo; + bool UseDefault = true; + const bool sameAsDefault = outputting() && !Val.hasValue(); + if (!outputting() && !Val.hasValue()) + Val = T(); + if (Val.hasValue() && + this->preflightKey(Key, Required, sameAsDefault, UseDefault, SaveInfo)) { + + // When reading an Optional<X> key from a YAML description, we allow the + // special "<none>" value, which can be used to specify that no value was + // requested, i.e. the DefaultValue will be assigned. The DefaultValue is + // usually None. + bool IsNone = false; + if (!outputting()) + if (auto *Node = dyn_cast<ScalarNode>(((Input *)this)->getCurrentNode())) + // We use rtrim to ignore possible white spaces that might exist when a + // comment is present on the same line. + IsNone = Node->getRawValue().rtrim(' ') == "<none>"; + + if (IsNone) + Val = DefaultValue; + else + yamlize(*this, Val.getValue(), Required, Ctx); + this->postflightKey(SaveInfo); + } else { + if (UseDefault) + Val = DefaultValue; + } +} + /// YAML I/O does conversion based on types. But often native data types /// are just a typedef of built in intergral types (e.g. int). But the C++ /// type matching system sees through the typedef and all the typedefed types @@ -1685,6 +1717,12 @@ struct ScalarTraits<Hex64> { static QuotingType mustQuote(StringRef) { return QuotingType::None; } }; +template <> struct ScalarTraits<VersionTuple> { + static void output(const VersionTuple &Value, void *, llvm::raw_ostream &Out); + static StringRef input(StringRef, void *, VersionTuple &); + static QuotingType mustQuote(StringRef) { return QuotingType::None; } +}; + // Define non-member operator>> so that Input can stream in a document list. template <typename T> inline std::enable_if_t<has_DocumentListTraits<T>::value, Input &> diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index 8d289f7c765f..7d572fe06f6f 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -15,7 +15,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/DataTypes.h" #include <cassert> +#include <chrono> #include <cstddef> #include <cstdint> #include <cstring> @@ -30,12 +32,14 @@ class format_object_base; class FormattedString; class FormattedNumber; class FormattedBytes; +template <class T> class LLVM_NODISCARD Expected; namespace sys { namespace fs { enum FileAccess : unsigned; enum OpenFlags : unsigned; enum CreationDisposition : unsigned; +class FileLocker; } // end namespace fs } // end namespace sys @@ -44,7 +48,16 @@ enum CreationDisposition : unsigned; /// buffered disciplines etc. It is a simple buffer that outputs /// a chunk at a time. class raw_ostream { +public: + // Class kinds to support LLVM-style RTTI. + enum class OStreamKind { + OK_OStream, + OK_FDStream, + }; + private: + OStreamKind Kind; + /// The buffer is handled in such a way that the buffer is /// uninitialized, unbuffered, or out of space when OutBufCur >= /// OutBufEnd. Thus a single comparison suffices to determine if we @@ -102,9 +115,10 @@ public: static constexpr Colors SAVEDCOLOR = Colors::SAVEDCOLOR; static constexpr Colors RESET = Colors::RESET; - explicit raw_ostream(bool unbuffered = false) - : BufferMode(unbuffered ? BufferKind::Unbuffered - : BufferKind::InternalBuffer) { + explicit raw_ostream(bool unbuffered = false, + OStreamKind K = OStreamKind::OK_OStream) + : Kind(K), BufferMode(unbuffered ? BufferKind::Unbuffered + : BufferKind::InternalBuffer) { // Start out ready to flush. OutBufStart = OutBufEnd = OutBufCur = nullptr; } @@ -117,6 +131,8 @@ public: /// tell - Return the current offset with the file. uint64_t tell() const { return current_pos() + GetNumBytesInBuffer(); } + OStreamKind get_kind() const { return Kind; } + //===--------------------------------------------------------------------===// // Configuration Interface //===--------------------------------------------------------------------===// @@ -385,8 +401,9 @@ class raw_pwrite_stream : public raw_ostream { void anchor() override; public: - explicit raw_pwrite_stream(bool Unbuffered = false) - : raw_ostream(Unbuffered) {} + explicit raw_pwrite_stream(bool Unbuffered = false, + OStreamKind K = OStreamKind::OK_OStream) + : raw_ostream(Unbuffered, K) {} void pwrite(const char *Ptr, size_t Size, uint64_t Offset) { #ifndef NDEBUG uint64_t Pos = tell(); @@ -409,6 +426,7 @@ class raw_fd_ostream : public raw_pwrite_stream { int FD; bool ShouldClose; bool SupportsSeeking = false; + mutable Optional<bool> HasColors; #ifdef _WIN32 /// True if this fd refers to a Windows console device. Mintty and other @@ -432,10 +450,17 @@ class raw_fd_ostream : public raw_pwrite_stream { /// Determine an efficient buffer size. size_t preferred_buffer_size() const override; + void anchor() override; + +protected: /// Set the flag indicating that an output error has been encountered. void error_detected(std::error_code EC) { this->EC = EC; } - void anchor() override; + /// Return the file descriptor. + int get_fd() const { return FD; } + + // Update the file position by increasing \p Delta. + void inc_pos(uint64_t Delta) { pos += Delta; } public: /// Open the specified file for writing. If an error occurs, information @@ -460,7 +485,8 @@ public: /// FD is the file descriptor that this writes to. If ShouldClose is true, /// this closes the file when the stream is destroyed. If FD is for stdout or /// stderr, it will not be closed. - raw_fd_ostream(int fd, bool shouldClose, bool unbuffered=false); + raw_fd_ostream(int fd, bool shouldClose, bool unbuffered = false, + OStreamKind K = OStreamKind::OK_OStream); ~raw_fd_ostream() override; @@ -468,7 +494,7 @@ public: /// fsync. void close(); - bool supportsSeeking() { return SupportsSeeking; } + bool supportsSeeking() const { return SupportsSeeking; } /// Flushes the stream and repositions the underlying file descriptor position /// to the offset specified from the beginning of the file. @@ -496,6 +522,38 @@ public: /// - from The Zen of Python, by Tim Peters /// void clear_error() { EC = std::error_code(); } + + /// Locks the underlying file. + /// + /// @returns RAII object that releases the lock upon leaving the scope, if the + /// locking was successful. Otherwise returns corresponding + /// error code. + /// + /// The function blocks the current thread until the lock become available or + /// error occurs. + /// + /// Possible use of this function may be as follows: + /// + /// @code{.cpp} + /// if (auto L = stream.lock()) { + /// // ... do action that require file to be locked. + /// } else { + /// handleAllErrors(std::move(L.takeError()), [&](ErrorInfoBase &EIB) { + /// // ... handle lock error. + /// }); + /// } + /// @endcode + LLVM_NODISCARD Expected<sys::fs::FileLocker> lock(); + + /// Tries to lock the underlying file within the specified period. + /// + /// @returns RAII object that releases the lock upon leaving the scope, if the + /// locking was successful. Otherwise returns corresponding + /// error code. + /// + /// It is used as @ref lock. + LLVM_NODISCARD + Expected<sys::fs::FileLocker> tryLockFor(std::chrono::milliseconds Timeout); }; /// This returns a reference to a raw_fd_ostream for standard output. Use it @@ -513,6 +571,34 @@ raw_fd_ostream &errs(); raw_ostream &nulls(); //===----------------------------------------------------------------------===// +// File Streams +//===----------------------------------------------------------------------===// + +/// A raw_ostream of a file for reading/writing/seeking. +/// +class raw_fd_stream : public raw_fd_ostream { +public: + /// Open the specified file for reading/writing/seeking. If an error occurs, + /// information about the error is put into EC, and the stream should be + /// immediately destroyed. + raw_fd_stream(StringRef Filename, std::error_code &EC); + + /// This reads the \p Size bytes into a buffer pointed by \p Ptr. + /// + /// \param Ptr The start of the buffer to hold data to be read. + /// + /// \param Size The number of bytes to be read. + /// + /// On success, the number of bytes read is returned, and the file position is + /// advanced by this number. On error, -1 is returned, use error() to get the + /// error code. + ssize_t read(char *Ptr, size_t Size); + + /// Check if \p OS is a pointer of type raw_fd_stream*. + static bool classof(const raw_ostream *OS); +}; + +//===----------------------------------------------------------------------===// // Output Stream Adaptors //===----------------------------------------------------------------------===// @@ -601,6 +687,18 @@ public: ~buffer_ostream() override { OS << str(); } }; +class buffer_unique_ostream : public raw_svector_ostream { + std::unique_ptr<raw_ostream> OS; + SmallVector<char, 0> Buffer; + + virtual void anchor() override; + +public: + buffer_unique_ostream(std::unique_ptr<raw_ostream> OS) + : raw_svector_ostream(Buffer), OS(std::move(OS)) {} + ~buffer_unique_ostream() override { *OS << str(); } +}; + } // end namespace llvm #endif // LLVM_SUPPORT_RAW_OSTREAM_H diff --git a/llvm/include/llvm/TableGen/DirectiveEmitter.h b/llvm/include/llvm/TableGen/DirectiveEmitter.h new file mode 100644 index 000000000000..27ad0665a0e8 --- /dev/null +++ b/llvm/include/llvm/TableGen/DirectiveEmitter.h @@ -0,0 +1,211 @@ +#ifndef LLVM_TABLEGEN_DIRECTIVEEMITTER_H +#define LLVM_TABLEGEN_DIRECTIVEEMITTER_H + +#include "llvm/ADT/StringExtras.h" +#include "llvm/TableGen/Record.h" + +namespace llvm { + +// Wrapper class that contains DirectiveLanguage's information defined in +// DirectiveBase.td and provides helper methods for accessing it. +class DirectiveLanguage { +public: + explicit DirectiveLanguage(const llvm::RecordKeeper &Records) + : Records(Records) { + const auto &DirectiveLanguages = getDirectiveLanguages(); + Def = DirectiveLanguages[0]; + } + + StringRef getName() const { return Def->getValueAsString("name"); } + + StringRef getCppNamespace() const { + return Def->getValueAsString("cppNamespace"); + } + + StringRef getDirectivePrefix() const { + return Def->getValueAsString("directivePrefix"); + } + + StringRef getClausePrefix() const { + return Def->getValueAsString("clausePrefix"); + } + + StringRef getIncludeHeader() const { + return Def->getValueAsString("includeHeader"); + } + + StringRef getClauseEnumSetClass() const { + return Def->getValueAsString("clauseEnumSetClass"); + } + + StringRef getFlangClauseBaseClass() const { + return Def->getValueAsString("flangClauseBaseClass"); + } + + bool hasMakeEnumAvailableInNamespace() const { + return Def->getValueAsBit("makeEnumAvailableInNamespace"); + } + + bool hasEnableBitmaskEnumInNamespace() const { + return Def->getValueAsBit("enableBitmaskEnumInNamespace"); + } + + const std::vector<Record *> getDirectives() const { + return Records.getAllDerivedDefinitions("Directive"); + } + + const std::vector<Record *> getClauses() const { + return Records.getAllDerivedDefinitions("Clause"); + } + + bool HasValidityErrors() const; + +private: + const llvm::Record *Def; + const llvm::RecordKeeper &Records; + + const std::vector<Record *> getDirectiveLanguages() const { + return Records.getAllDerivedDefinitions("DirectiveLanguage"); + } +}; + +// Base record class used for Directive and Clause class defined in +// DirectiveBase.td. +class BaseRecord { +public: + explicit BaseRecord(const llvm::Record *Def) : Def(Def) {} + + StringRef getName() const { return Def->getValueAsString("name"); } + + StringRef getAlternativeName() const { + return Def->getValueAsString("alternativeName"); + } + + // Returns the name of the directive formatted for output. Whitespace are + // replaced with underscores. + std::string getFormattedName() { + StringRef Name = Def->getValueAsString("name"); + std::string N = Name.str(); + std::replace(N.begin(), N.end(), ' ', '_'); + return N; + } + + bool isDefault() const { return Def->getValueAsBit("isDefault"); } + + // Returns the record name. + const StringRef getRecordName() const { return Def->getName(); } + +protected: + const llvm::Record *Def; +}; + +// Wrapper class that contains a Directive's information defined in +// DirectiveBase.td and provides helper methods for accessing it. +class Directive : public BaseRecord { +public: + explicit Directive(const llvm::Record *Def) : BaseRecord(Def) {} + + std::vector<Record *> getAllowedClauses() const { + return Def->getValueAsListOfDefs("allowedClauses"); + } + + std::vector<Record *> getAllowedOnceClauses() const { + return Def->getValueAsListOfDefs("allowedOnceClauses"); + } + + std::vector<Record *> getAllowedExclusiveClauses() const { + return Def->getValueAsListOfDefs("allowedExclusiveClauses"); + } + + std::vector<Record *> getRequiredClauses() const { + return Def->getValueAsListOfDefs("requiredClauses"); + } +}; + +// Wrapper class that contains Clause's information defined in DirectiveBase.td +// and provides helper methods for accessing it. +class Clause : public BaseRecord { +public: + explicit Clause(const llvm::Record *Def) : BaseRecord(Def) {} + + // Optional field. + StringRef getClangClass() const { + return Def->getValueAsString("clangClass"); + } + + // Optional field. + StringRef getFlangClass() const { + return Def->getValueAsString("flangClass"); + } + + // Get the formatted name for Flang parser class. The generic formatted class + // name is constructed from the name were the first letter of each word is + // captitalized and the underscores are removed. + // ex: async -> Async + // num_threads -> NumThreads + std::string getFormattedParserClassName() { + StringRef Name = Def->getValueAsString("name"); + std::string N = Name.str(); + bool Cap = true; + std::transform(N.begin(), N.end(), N.begin(), [&Cap](unsigned char C) { + if (Cap == true) { + C = llvm::toUpper(C); + Cap = false; + } else if (C == '_') { + Cap = true; + } + return C; + }); + N.erase(std::remove(N.begin(), N.end(), '_'), N.end()); + return N; + } + + // Optional field. + StringRef getEnumName() const { + return Def->getValueAsString("enumClauseValue"); + } + + std::vector<Record *> getClauseVals() const { + return Def->getValueAsListOfDefs("allowedClauseValues"); + } + + bool isValueOptional() const { return Def->getValueAsBit("isValueOptional"); } + + bool isValueList() const { return Def->getValueAsBit("isValueList"); } + + StringRef getDefaultValue() const { + return Def->getValueAsString("defaultValue"); + } + + bool isImplicit() const { return Def->getValueAsBit("isImplicit"); } +}; + +// Wrapper class that contains VersionedClause's information defined in +// DirectiveBase.td and provides helper methods for accessing it. +class VersionedClause { +public: + explicit VersionedClause(const llvm::Record *Def) : Def(Def) {} + + // Return the specific clause record wrapped in the Clause class. + Clause getClause() const { return Clause{Def->getValueAsDef("clause")}; } + + int64_t getMinVersion() const { return Def->getValueAsInt("minVersion"); } + + int64_t getMaxVersion() const { return Def->getValueAsInt("maxVersion"); } + +private: + const llvm::Record *Def; +}; + +class ClauseVal : public BaseRecord { +public: + explicit ClauseVal(const llvm::Record *Def) : BaseRecord(Def) {} + + int getValue() const { return Def->getValueAsInt("value"); } + + bool isUserVisible() const { return Def->getValueAsBit("isUserValue"); } +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/TableGen/Error.h b/llvm/include/llvm/TableGen/Error.h index cf990427f577..f63b50ad786c 100644 --- a/llvm/include/llvm/TableGen/Error.h +++ b/llvm/include/llvm/TableGen/Error.h @@ -15,23 +15,38 @@ #define LLVM_TABLEGEN_ERROR_H #include "llvm/Support/SourceMgr.h" +#include "llvm/TableGen/Record.h" namespace llvm { void PrintNote(const Twine &Msg); void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg); +LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const Twine &Msg); +LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(ArrayRef<SMLoc> ErrorLoc, + const Twine &Msg); +LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const Record *Rec, + const Twine &Msg); +LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const RecordVal *RecVal, + const Twine &Msg); + +void PrintWarning(const Twine &Msg); void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg); void PrintWarning(const char *Loc, const Twine &Msg); -void PrintWarning(const Twine &Msg); +void PrintError(const Twine &Msg); void PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg); void PrintError(const char *Loc, const Twine &Msg); -void PrintError(const Twine &Msg); +void PrintError(const Record *Rec, const Twine &Msg); +void PrintError(const RecordVal *RecVal, const Twine &Msg); LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const Twine &Msg); LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg); +LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const Record *Rec, + const Twine &Msg); +LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const RecordVal *RecVal, + const Twine &Msg); extern SourceMgr SrcMgr; extern unsigned ErrorsPrinted; diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index a082fe5d74a1..b71aa0a89056 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -20,10 +20,12 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/Timer.h" #include "llvm/Support/TrailingObjects.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -57,7 +59,6 @@ public: enum RecTyKind { BitRecTyKind, BitsRecTyKind, - CodeRecTyKind, IntRecTyKind, StringRecTyKind, ListRecTyKind, @@ -67,6 +68,7 @@ public: private: RecTyKind Kind; + /// ListRecTy of the list that has elements of this type. ListRecTy *ListTy = nullptr; public: @@ -87,7 +89,7 @@ public: /// a bit set is not an int, but they are convertible. virtual bool typeIsA(const RecTy *RHS) const; - /// Returns the type representing list<this>. + /// Returns the type representing list<thistype>. ListRecTy *getListTy(); }; @@ -136,24 +138,6 @@ public: bool typeIsA(const RecTy *RHS) const override; }; -/// 'code' - Represent a code fragment -class CodeRecTy : public RecTy { - static CodeRecTy Shared; - - CodeRecTy() : RecTy(CodeRecTyKind) {} - -public: - static bool classof(const RecTy *RT) { - return RT->getRecTyKind() == CodeRecTyKind; - } - - static CodeRecTy *get() { return &Shared; } - - std::string getAsString() const override { return "code"; } - - bool typeIsConvertibleTo(const RecTy *RHS) const override; -}; - /// 'int' - Represent an integer value of no particular size class IntRecTy : public RecTy { static IntRecTy Shared; @@ -190,14 +174,14 @@ public: bool typeIsConvertibleTo(const RecTy *RHS) const override; }; -/// 'list<Ty>' - Represent a list of values, all of which must be of -/// the specified type. +/// 'list<Ty>' - Represent a list of element values, all of which must be of +/// the specified type. The type is stored in ElementTy. class ListRecTy : public RecTy { friend ListRecTy *RecTy::getListTy(); - RecTy *Ty; + RecTy *ElementTy; - explicit ListRecTy(RecTy *T) : RecTy(ListRecTyKind), Ty(T) {} + explicit ListRecTy(RecTy *T) : RecTy(ListRecTyKind), ElementTy(T) {} public: static bool classof(const RecTy *RT) { @@ -205,7 +189,7 @@ public: } static ListRecTy *get(RecTy *T) { return T->getListTy(); } - RecTy *getElementType() const { return Ty; } + RecTy *getElementType() const { return ElementTy; } std::string getAsString() const override; @@ -304,7 +288,6 @@ protected: IK_FirstTypedInit, IK_BitInit, IK_BitsInit, - IK_CodeInit, IK_DagInit, IK_DefInit, IK_FieldInit, @@ -337,6 +320,7 @@ private: virtual void anchor(); public: + /// Get the kind (type) of the value. InitKind getKind() const { return Kind; } protected: @@ -347,63 +331,61 @@ public: Init &operator=(const Init &) = delete; virtual ~Init() = default; - /// This virtual method should be overridden by values that may - /// not be completely specified yet. + /// Is this a complete value with no unset (uninitialized) subvalues? virtual bool isComplete() const { return true; } /// Is this a concrete and fully resolved value without any references or /// stuck operations? Unset values are concrete. virtual bool isConcrete() const { return false; } - /// Print out this value. + /// Print this value. void print(raw_ostream &OS) const { OS << getAsString(); } - /// Convert this value to a string form. + /// Convert this value to a literal form. virtual std::string getAsString() const = 0; - /// Convert this value to a string form, - /// without adding quote markers. This primaruly affects - /// StringInits where we will not surround the string value with - /// quotes. + + /// Convert this value to a literal form, + /// without adding quotes around a string. virtual std::string getAsUnquotedString() const { return getAsString(); } - /// Debugging method that may be called through a debugger, just + /// Debugging method that may be called through a debugger; just /// invokes print on stderr. void dump() const; - /// If this initializer is convertible to Ty, return an initializer whose - /// type is-a Ty, generating a !cast operation if required. Otherwise, return - /// nullptr. + /// If this value is convertible to type \p Ty, return a value whose + /// type is \p Ty, generating a !cast operation if required. + /// Otherwise, return null. virtual Init *getCastTo(RecTy *Ty) const = 0; - /// Convert to an initializer whose type is-a Ty, or return nullptr if this - /// is not possible (this can happen if the initializer's type is convertible - /// to Ty, but there are unresolved references). + /// Convert to a value whose type is \p Ty, or return null if this + /// is not possible. This can happen if the value's type is convertible + /// to \p Ty, but there are unresolved references. virtual Init *convertInitializerTo(RecTy *Ty) const = 0; - /// This method is used to implement the bitrange - /// selection operator. Given an initializer, it selects the specified bits - /// out, returning them as a new init of bits type. If it is not legal to use - /// the bit subscript operator on this initializer, return null. + /// This function is used to implement the bit range + /// selection operator. Given a value, it selects the specified bits, + /// returning them as a new \p Init of type \p bits. If it is not legal + /// to use the bit selection operator on this value, null is returned. virtual Init *convertInitializerBitRange(ArrayRef<unsigned> Bits) const { return nullptr; } - /// This method is used to implement the list slice - /// selection operator. Given an initializer, it selects the specified list - /// elements, returning them as a new init of list type. If it is not legal - /// to take a slice of this, return null. + /// This function is used to implement the list slice + /// selection operator. Given a value, it selects the specified list + /// elements, returning them as a new \p Init of type \p list. If it + /// is not legal to use the slice operator, null is returned. virtual Init *convertInitListSlice(ArrayRef<unsigned> Elements) const { return nullptr; } - /// This method is used to implement the FieldInit class. - /// Implementors of this method should return the type of the named field if - /// they are of record type. + /// This function is used to implement the FieldInit class. + /// Implementors of this method should return the type of the named + /// field if they are of type record. virtual RecTy *getFieldType(StringInit *FieldName) const { return nullptr; } - /// This method is used by classes that refer to other + /// This function is used by classes that refer to other /// variables which may not be defined at the time the expression is formed. /// If a value is set for the variable later, this method will be called on /// users of the value to allow the value to propagate out. @@ -411,8 +393,7 @@ public: return const_cast<Init *>(this); } - /// This method is used to return the initializer for the specified - /// bit. + /// Get the \p Init value of the specified bit. virtual Init *getBit(unsigned Bit) const = 0; }; @@ -420,14 +401,14 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Init &I) { I.print(OS); return OS; } -/// This is the common super-class of types that have a specific, -/// explicit, type. +/// This is the common superclass of types that have a specific, +/// explicit type, stored in ValueTy. class TypedInit : public Init { - RecTy *Ty; + RecTy *ValueTy; protected: explicit TypedInit(InitKind K, RecTy *T, uint8_t Opc = 0) - : Init(K, Opc), Ty(T) {} + : Init(K, Opc), ValueTy(T) {} public: TypedInit(const TypedInit &) = delete; @@ -438,7 +419,8 @@ public: I->getKind() <= IK_LastTypedInit; } - RecTy *getType() const { return Ty; } + /// Get the type of the Init as a RecTy. + RecTy *getType() const { return ValueTy; } Init *getCastTo(RecTy *Ty) const override; Init *convertInitializerTo(RecTy *Ty) const override; @@ -448,12 +430,11 @@ public: /// This method is used to implement the FieldInit class. /// Implementors of this method should return the type of the named field if - /// they are of record type. - /// + /// they are of type record. RecTy *getFieldType(StringInit *FieldName) const override; }; -/// '?' - Represents an uninitialized value +/// '?' - Represents an uninitialized value. class UnsetInit : public Init { UnsetInit() : Init(IK_UnsetInit) {} @@ -465,6 +446,7 @@ public: return I->getKind() == IK_UnsetInit; } + /// Get the singleton unset Init. static UnsetInit *get(); Init *getCastTo(RecTy *Ty) const override; @@ -474,8 +456,12 @@ public: return const_cast<UnsetInit*>(this); } + /// Is this a complete value with no unset (uninitialized) subvalues? bool isComplete() const override { return false; } + bool isConcrete() const override { return true; } + + /// Get the string representation of the Init. std::string getAsString() const override { return "?"; } }; @@ -592,10 +578,18 @@ public: /// "foo" - Represent an initialization by a string value. class StringInit : public TypedInit { +public: + enum StringFormat { + SF_String, // Format as "text" + SF_Code, // Format as [{text}] + }; + +private: StringRef Value; + StringFormat Format; - explicit StringInit(StringRef V) - : TypedInit(IK_StringInit, StringRecTy::get()), Value(V) {} + explicit StringInit(StringRef V, StringFormat Fmt) + : TypedInit(IK_StringInit, StringRecTy::get()), Value(V), Format(Fmt) {} public: StringInit(const StringInit &) = delete; @@ -605,50 +599,25 @@ public: return I->getKind() == IK_StringInit; } - static StringInit *get(StringRef); - - StringRef getValue() const { return Value; } - - Init *convertInitializerTo(RecTy *Ty) const override; - - bool isConcrete() const override { return true; } - std::string getAsString() const override { return "\"" + Value.str() + "\""; } - - std::string getAsUnquotedString() const override { - return std::string(Value); - } - - Init *getBit(unsigned Bit) const override { - llvm_unreachable("Illegal bit reference off string"); - } -}; - -class CodeInit : public TypedInit { - StringRef Value; - SMLoc Loc; - - explicit CodeInit(StringRef V, const SMLoc &Loc) - : TypedInit(IK_CodeInit, static_cast<RecTy *>(CodeRecTy::get())), - Value(V), Loc(Loc) {} - -public: - CodeInit(const StringInit &) = delete; - CodeInit &operator=(const StringInit &) = delete; + static StringInit *get(StringRef, StringFormat Fmt = SF_String); - static bool classof(const Init *I) { - return I->getKind() == IK_CodeInit; + static StringFormat determineFormat(StringFormat Fmt1, StringFormat Fmt2) { + return (Fmt1 == SF_Code || Fmt2 == SF_Code) ? SF_Code : SF_String; } - static CodeInit *get(StringRef, const SMLoc &Loc); - StringRef getValue() const { return Value; } - const SMLoc &getLoc() const { return Loc; } + StringFormat getFormat() const { return Format; } + bool hasCodeFormat() const { return Format == SF_Code; } Init *convertInitializerTo(RecTy *Ty) const override; bool isConcrete() const override { return true; } + std::string getAsString() const override { - return "[{" + Value.str() + "}]"; + if (Format == SF_String) + return "\"" + Value.str() + "\""; + else + return "[{" + Value.str() + "}]"; } std::string getAsUnquotedString() const override { @@ -755,7 +724,7 @@ public: /// class UnOpInit : public OpInit, public FoldingSetNode { public: - enum UnaryOp : uint8_t { CAST, HEAD, TAIL, SIZE, EMPTY, GETOP }; + enum UnaryOp : uint8_t { CAST, NOT, HEAD, TAIL, SIZE, EMPTY, GETDAGOP }; private: Init *LHS; @@ -804,9 +773,9 @@ public: /// !op (X, Y) - Combine two inits. class BinOpInit : public OpInit, public FoldingSetNode { public: - enum BinaryOp : uint8_t { ADD, MUL, AND, OR, SHL, SRA, SRL, LISTCONCAT, - LISTSPLAT, STRCONCAT, CONCAT, EQ, NE, LE, LT, GE, - GT, SETOP }; + enum BinaryOp : uint8_t { ADD, SUB, MUL, AND, OR, XOR, SHL, SRA, SRL, LISTCONCAT, + LISTSPLAT, STRCONCAT, INTERLEAVE, CONCAT, EQ, + NE, LE, LT, GE, GT, SETDAGOP }; private: Init *LHS, *RHS; @@ -826,7 +795,6 @@ public: RecTy *Type); static Init *getStrConcat(Init *lhs, Init *rhs); static Init *getListConcat(TypedInit *lhs, Init *rhs); - static Init *getListSplat(TypedInit *lhs, Init *rhs); void Profile(FoldingSetNodeID &ID) const; @@ -862,7 +830,7 @@ public: /// !op (X, Y, Z) - Combine two inits. class TernOpInit : public OpInit, public FoldingSetNode { public: - enum TernaryOp : uint8_t { SUBST, FOREACH, IF, DAG }; + enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG, SUBSTR }; private: Init *LHS, *MHS, *RHS; @@ -1397,30 +1365,70 @@ public: // High-Level Classes //===----------------------------------------------------------------------===// +/// This class represents a field in a record, including its name, type, +/// value, and source location. class RecordVal { friend class Record; +public: + enum FieldKind { + FK_Normal, // A normal record field. + FK_NonconcreteOK, // A field that can be nonconcrete ('field' keyword). + FK_TemplateArg, // A template argument. + }; + +private: Init *Name; - PointerIntPair<RecTy *, 1, bool> TyAndPrefix; + SMLoc Loc; // Source location of definition of name. + PointerIntPair<RecTy *, 2, FieldKind> TyAndKind; Init *Value; public: - RecordVal(Init *N, RecTy *T, bool P); + RecordVal(Init *N, RecTy *T, FieldKind K); + RecordVal(Init *N, SMLoc Loc, RecTy *T, FieldKind K); + /// Get the name of the field as a StringRef. StringRef getName() const; + + /// Get the name of the field as an Init. Init *getNameInit() const { return Name; } + /// Get the name of the field as a std::string. std::string getNameInitAsString() const { return getNameInit()->getAsUnquotedString(); } - bool getPrefix() const { return TyAndPrefix.getInt(); } - RecTy *getType() const { return TyAndPrefix.getPointer(); } + /// Get the source location of the point where the field was defined. + const SMLoc &getLoc() const { return Loc; } + + /// Is this a field where nonconcrete values are okay? + bool isNonconcreteOK() const { + return TyAndKind.getInt() == FK_NonconcreteOK; + } + + /// Is this a template argument? + bool isTemplateArg() const { + return TyAndKind.getInt() == FK_TemplateArg; + } + + /// Get the type of the field value as a RecTy. + RecTy *getType() const { return TyAndKind.getPointer(); } + + /// Get the type of the field for printing purposes. + std::string getPrintType() const; + + /// Get the value of the field as an Init. Init *getValue() const { return Value; } + /// Set the value of the field from an Init. bool setValue(Init *V); + /// Set the value and source location of the field. + bool setValue(Init *V, SMLoc NewLoc); + void dump() const; + + /// Print the value to an output stream, possibly with a semicolon. void print(raw_ostream &OS, bool PrintSem = true) const; }; @@ -1438,15 +1446,18 @@ class Record { SmallVector<SMLoc, 4> Locs; SmallVector<Init *, 0> TemplateArgs; SmallVector<RecordVal, 0> Values; + // Vector of [source location, condition Init, message Init]. + SmallVector<std::tuple<SMLoc, Init *, Init *>, 0> Assertions; - // All superclasses in the inheritance forest in reverse preorder (yes, it + // All superclasses in the inheritance forest in post-order (yes, it // must be a forest; diamond-shaped inheritance is not allowed). SmallVector<std::pair<Record *, SMRange>, 0> SuperClasses; // Tracks Record instances. Not owned by Record. RecordKeeper &TrackedRecords; - DefInit *TheInit = nullptr; + // The DefInit corresponding to this record. + DefInit *CorrespondingDefInit = nullptr; // Unique record ID. unsigned ID; @@ -1470,8 +1481,8 @@ public: : Record(StringInit::get(N), locs, records, false, Class) {} // When copy-constructing a Record, we must still guarantee a globally unique - // ID number. Don't copy TheInit either since it's owned by the original - // record. All other fields can be copied normally. + // ID number. Don't copy CorrespondingDefInit either, since it's owned by the + // original record. All other fields can be copied normally. Record(const Record &O) : Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs), Values(O.Values), SuperClasses(O.SuperClasses), @@ -1511,11 +1522,18 @@ public: ArrayRef<RecordVal> getValues() const { return Values; } + ArrayRef<std::tuple<SMLoc, Init *, Init *>> getAssertions() const { + return Assertions; + } + ArrayRef<std::pair<Record *, SMRange>> getSuperClasses() const { return SuperClasses; } - /// Append the direct super classes of this record to Classes. + /// Determine whether this record has the specified direct superclass. + bool hasDirectSuperClass(const Record *SuperClass) const; + + /// Append the direct superclasses of this record to Classes. void getDirectSuperClasses(SmallVectorImpl<Record *> &Classes) const; bool isTemplateArg(Init *Name) const { @@ -1565,6 +1583,10 @@ public: removeValue(StringInit::get(Name)); } + void addAssertion(SMLoc Loc, Init *Condition, Init *Message) { + Assertions.push_back(std::make_tuple(Loc, Condition, Message)); + } + bool isSubClassOf(const Record *R) const { for (const auto &SCPair : SuperClasses) if (SCPair.first == R) @@ -1585,7 +1607,8 @@ public: } void addSuperClass(Record *R, SMRange Range) { - assert(!TheInit && "changing type of record after it has been referenced"); + assert(!CorrespondingDefInit && + "changing type of record after it has been referenced"); assert(!isSubClassOf(R) && "Already subclassing record!"); SuperClasses.push_back(std::make_pair(R, Range)); } @@ -1612,13 +1635,15 @@ public: return IsAnonymous; } - void print(raw_ostream &OS) const; void dump() const; //===--------------------------------------------------------------------===// // High-level methods useful to tablegen back-ends // + ///Return the source location for the named field. + SMLoc getFieldLoc(StringRef FieldName) const; + /// Return the initializer for a value with the specified name, /// or throw an exception if the field does not exist. Init *getValueInit(StringRef FieldName) const; @@ -1634,6 +1659,11 @@ public: StringRef getValueAsString(StringRef FieldName) const; /// This method looks up the specified field and returns + /// its value as a string, throwing an exception if the field if the value is + /// not a string and llvm::Optional() if the field does not exist. + llvm::Optional<StringRef> getValueAsOptionalString(StringRef FieldName) const; + + /// This method looks up the specified field and returns /// its value as a BitsInit, throwing an exception if the field does not exist /// or if the value is not the right type. BitsInit *getValueAsBitsInit(StringRef FieldName) const; @@ -1694,26 +1724,50 @@ raw_ostream &operator<<(raw_ostream &OS, const Record &R); class RecordKeeper { friend class RecordRecTy; + using RecordMap = std::map<std::string, std::unique_ptr<Record>, std::less<>>; + using GlobalMap = std::map<std::string, Init *, std::less<>>; + + std::string InputFilename; RecordMap Classes, Defs; + mutable StringMap<std::vector<Record *>> ClassRecordsMap; FoldingSet<RecordRecTy> RecordTypePool; std::map<std::string, Init *, std::less<>> ExtraGlobals; unsigned AnonCounter = 0; + // These members are for the phase timing feature. We need a timer group, + // the last timer started, and a flag to say whether the last timer + // is the special "backend overall timer." + TimerGroup *TimingGroup = nullptr; + Timer *LastTimer = nullptr; + bool BackendTimer = false; + public: + /// Get the main TableGen input file's name. + const std::string getInputFilename() const { return InputFilename; } + + /// Get the map of classes. const RecordMap &getClasses() const { return Classes; } + + /// Get the map of records (defs). const RecordMap &getDefs() const { return Defs; } + /// Get the map of global variables. + const GlobalMap &getGlobals() const { return ExtraGlobals; } + + /// Get the class with the specified name. Record *getClass(StringRef Name) const { auto I = Classes.find(Name); return I == Classes.end() ? nullptr : I->second.get(); } + /// Get the concrete record with the specified name. Record *getDef(StringRef Name) const { auto I = Defs.find(Name); return I == Defs.end() ? nullptr : I->second.get(); } + /// Get the \p Init value of the specified global variable. Init *getGlobal(StringRef Name) const { if (Record *R = getDef(Name)) return R->getDefInit(); @@ -1721,6 +1775,10 @@ public: return It == ExtraGlobals.end() ? nullptr : It->second; } + void saveInputFilename(std::string Filename) { + InputFilename = Filename; + } + void addClass(std::unique_ptr<Record> R) { bool Ins = Classes.insert(std::make_pair(std::string(R->getName()), std::move(R))).second; @@ -1744,14 +1802,42 @@ public: Init *getNewAnonymousName(); + /// Start phase timing; called if the --time-phases option is specified. + void startPhaseTiming() { + TimingGroup = new TimerGroup("TableGen", "TableGen Phase Timing"); + } + + /// Start timing a phase. Automatically stops any previous phase timer. + void startTimer(StringRef Name); + + /// Stop timing a phase. + void stopTimer(); + + /// Start timing the overall backend. If the backend itself starts a timer, + /// then this timer is cleared. + void startBackendTimer(StringRef Name); + + /// Stop timing the overall backend. + void stopBackendTimer(); + + /// Stop phase timing and print the report. + void stopPhaseTiming() { + if (TimingGroup) + delete TimingGroup; + } + //===--------------------------------------------------------------------===// - // High-level helper methods, useful for tablegen backends... + // High-level helper methods, useful for tablegen backends. - /// This method returns all concrete definitions - /// that derive from the specified class name. A class with the specified - /// name must exist. + /// Get all the concrete records that inherit from the one specified + /// class. The class must be defined. std::vector<Record *> getAllDerivedDefinitions(StringRef ClassName) const; + /// Get all the concrete records that inherit from all the specified + /// classes. The classes must be defined. + std::vector<Record *> getAllDerivedDefinitions( + ArrayRef<StringRef> ClassNames) const; + void dump() const; }; @@ -1781,8 +1867,6 @@ struct LessRecordFieldName { }; struct LessRecordRegister { - static bool ascii_isdigit(char x) { return x >= '0' && x <= '9'; } - struct RecordParts { SmallVector<std::pair< bool, StringRef>, 4> Parts; @@ -1793,18 +1877,18 @@ struct LessRecordRegister { size_t Len = 0; const char *Start = Rec.data(); const char *Curr = Start; - bool isDigitPart = ascii_isdigit(Curr[0]); + bool IsDigitPart = isDigit(Curr[0]); for (size_t I = 0, E = Rec.size(); I != E; ++I, ++Len) { - bool isDigit = ascii_isdigit(Curr[I]); - if (isDigit != isDigitPart) { - Parts.push_back(std::make_pair(isDigitPart, StringRef(Start, Len))); + bool IsDigit = isDigit(Curr[I]); + if (IsDigit != IsDigitPart) { + Parts.push_back(std::make_pair(IsDigitPart, StringRef(Start, Len))); Len = 0; Start = &Curr[I]; - isDigitPart = ascii_isdigit(Curr[I]); + IsDigitPart = isDigit(Curr[I]); } } // Push the last part. - Parts.push_back(std::make_pair(isDigitPart, StringRef(Start, Len))); + Parts.push_back(std::make_pair(IsDigitPart, StringRef(Start, Len))); } size_t size() { return Parts.size(); } @@ -1927,25 +2011,6 @@ public: bool keepUnsetBits() const override { return true; } }; -/// Resolve all references to a specific RecordVal. -// -// TODO: This is used for resolving references to template arguments, in a -// rather inefficient way. Change those uses to resolve all template -// arguments simultaneously and get rid of this class. -class RecordValResolver final : public Resolver { - const RecordVal *RV; - -public: - explicit RecordValResolver(Record &R, const RecordVal *RV) - : Resolver(&R), RV(RV) {} - - Init *resolve(Init *VarName) override { - if (VarName == RV->getNameInit()) - return RV->getValue(); - return nullptr; - } -}; - /// Delegate resolving to a sub-resolver, but shadow some variable names. class ShadowResolver final : public Resolver { Resolver &R; @@ -1996,6 +2061,7 @@ public: Init *resolve(Init *VarName) override; }; +void EmitDetailedRecords(RecordKeeper &RK, raw_ostream &OS); void EmitJSON(RecordKeeper &RK, raw_ostream &OS); } // end namespace llvm diff --git a/llvm/include/llvm/TableGen/SearchableTable.td b/llvm/include/llvm/TableGen/SearchableTable.td index 2680c71218ea..61dfa5c70706 100644 --- a/llvm/include/llvm/TableGen/SearchableTable.td +++ b/llvm/include/llvm/TableGen/SearchableTable.td @@ -67,9 +67,13 @@ class GenericTable { // List of the names of fields of collected records that contain the data for // table entries, in the order that is used for initialization in C++. // - // For each field of the table named XXX, TableGen will look for a value - // called TypeOf_XXX and use that as a more detailed description of the - // type of the field if present. This is required for fields whose type + // TableGen needs to know the type of the fields so that it can format + // the initializers correctly. It can infer the type of bit, bits, string, + // Intrinsic, and Instruction values. + // + // For each field of the table named xxx, TableGen will look for a field + // named TypeOf_xxx and use that as a more detailed description of the + // type of the field. This is required for fields whose type // cannot be deduced automatically, such as enum fields. For example: // // def MyEnum : GenericEnum { @@ -85,15 +89,15 @@ class GenericTable { // def MyTable : GenericTable { // let FilterClass = "MyTableEntry"; // let Fields = ["V", ...]; - // GenericEnum TypeOf_V = MyEnum; + // string TypeOf_V = "MyEnum"; // } // - // Fields of type bit, bits<N>, string, Intrinsic, and Instruction (or - // derived classes of those) are supported natively. + // If a string field was initialized with a code literal, TableGen will + // emit the code verbatim. However, if a string field was initialized + // in some other way, but should be interpreted as code, then a TypeOf_xxx + // field is necessary, with a value of "code": // - // Additionally, fields of type `code` can appear, where the value is used - // verbatim as an initializer. However, these fields cannot be used as - // search keys. + // string TypeOf_Predicate = "code"; list<string> Fields; // (Optional) List of fields that make up the primary key. @@ -103,7 +107,7 @@ class GenericTable { string PrimaryKeyName; // See SearchIndex.EarlyOut - bit PrimaryKeyEarlyOut = 0; + bit PrimaryKeyEarlyOut = false; } // Define a record derived from this class to generate an additional search @@ -124,7 +128,7 @@ class SearchIndex { // instructions. // // Can only be used when the first field is an integral (non-string) type. - bit EarlyOut = 0; + bit EarlyOut = false; } // Legacy table type with integrated enum. diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h new file mode 100644 index 000000000000..c3a221e01ceb --- /dev/null +++ b/llvm/include/llvm/Target/CGPassBuilderOption.h @@ -0,0 +1,65 @@ +//===- CGPassBuilderOption.h - Options for pass builder ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the CCState and CCValAssign classes, used for lowering +// and implementing calling conventions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PASSBUILDER_OPTION_H +#define LLVM_CODEGEN_PASSBUILDER_OPTION_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Target/TargetOptions.h" +#include <vector> + +namespace llvm { +class TargetMachine; + +enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline }; +enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP }; +enum class CFLAAType { None, Steensgaard, Andersen, Both }; + +// Not one-on-one but mostly corresponding to commandline options in +// TargetPassConfig.cpp. +struct CGPassBuilderOption { + Optional<bool> OptimizeRegAlloc; + Optional<bool> EnableIPRA; + bool DebugPM = false; + bool DisableVerify = false; + bool EnableImplicitNullChecks = false; + bool EnableBlockPlacementStats = false; + bool MISchedPostRA = false; + bool EarlyLiveIntervals = false; + + bool DisableLSR = false; + bool DisableCGP = false; + bool PrintLSR = false; + bool DisableMergeICmps = false; + bool DisablePartialLibcallInlining = false; + bool DisableConstantHoisting = false; + bool PrintISelInput = false; + bool PrintGCInfo = false; + bool RequiresCodeGenSCCOrder = false; + + RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault; + RegAllocType RegAlloc = RegAllocType::Default; + CFLAAType UseCFLAA = CFLAAType::None; + Optional<GlobalISelAbortMode> EnableGlobalISelAbort; + + Optional<bool> VerifyMachineCode; + Optional<bool> EnableFastISelOption; + Optional<bool> EnableGlobalISelOption; +}; + +CGPassBuilderOption getCGPassBuilderOption(); + +} // namespace llvm + +#endif // LLVM_CODEGEN_PASSBUILDER_OPTION_H diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 3d8262b2404f..209925969df3 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -16,7 +16,7 @@ //------------------------------------------------------------------------------ class GenericInstruction : StandardPseudoInstruction { - let isPreISelOpcode = 1; + let isPreISelOpcode = true; } // Provide a variant of an instruction with the same operands, but @@ -31,8 +31,8 @@ class ConstrainedIntruction<GenericInstruction baseInst> : // TODO: Do we need a better way to mark reads from FP mode than // hasSideEffects? - let hasSideEffects = 1; - let mayRaiseFPException = 1; + let hasSideEffects = true; + let mayRaiseFPException = true; } // Extend the underlying scalar type of an operation, leaving the high bits @@ -40,7 +40,7 @@ class ConstrainedIntruction<GenericInstruction baseInst> : def G_ANYEXT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } // Sign extend the underlying scalar type of an operation, copying the sign bit @@ -48,7 +48,7 @@ def G_ANYEXT : GenericInstruction { def G_SEXT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } // Sign extend the a value from an arbitrary bit position, copying the sign bit @@ -62,7 +62,7 @@ def G_SEXT : GenericInstruction { def G_SEXT_INREG : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src, untyped_imm_0:$sz); - let hasSideEffects = 0; + let hasSideEffects = false; } // Zero extend the underlying scalar type of an operation, putting zero bits @@ -70,7 +70,7 @@ def G_SEXT_INREG : GenericInstruction { def G_ZEXT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } @@ -79,150 +79,150 @@ def G_ZEXT : GenericInstruction { def G_TRUNC : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_IMPLICIT_DEF : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_PHI : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins variable_ops); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_FRAME_INDEX : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$src2); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_GLOBAL_VALUE : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_INTTOPTR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_PTRTOINT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_BITCAST : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } // Only supports scalar result types def G_CONSTANT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$imm); - let hasSideEffects = 0; + let hasSideEffects = false; } // Only supports scalar result types def G_FCONSTANT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$imm); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_VASTART : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins type0:$list); - let hasSideEffects = 0; - let mayStore = 1; + let hasSideEffects = false; + let mayStore = true; } def G_VAARG : GenericInstruction { let OutOperandList = (outs type0:$val); let InOperandList = (ins type1:$list, unknown:$align); - let hasSideEffects = 0; - let mayLoad = 1; - let mayStore = 1; + let hasSideEffects = false; + let mayLoad = true; + let mayStore = true; } def G_CTLZ : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_CTLZ_ZERO_UNDEF : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_CTTZ : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_CTTZ_ZERO_UNDEF : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_CTPOP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_BSWAP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_BITREVERSE : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_ADDRSPACE_CAST : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_BLOCK_ADDR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$ba); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_JUMP_TABLE : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$jti); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_DYN_STACKALLOC : GenericInstruction { let OutOperandList = (outs ptype0:$dst); let InOperandList = (ins type1:$size, i32imm:$align); - let hasSideEffects = 1; + let hasSideEffects = true; } def G_FREEZE : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } //------------------------------------------------------------------------------ @@ -233,101 +233,101 @@ def G_FREEZE : GenericInstruction { def G_ADD : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic subtraction. def G_SUB : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 0; + let hasSideEffects = false; + let isCommutable = false; } // Generic multiplication. def G_MUL : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic signed division. def G_SDIV : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 0; + let hasSideEffects = false; + let isCommutable = false; } // Generic unsigned division. def G_UDIV : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 0; + let hasSideEffects = false; + let isCommutable = false; } // Generic signed remainder. def G_SREM : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 0; + let hasSideEffects = false; + let isCommutable = false; } // Generic unsigned remainder. def G_UREM : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 0; + let hasSideEffects = false; + let isCommutable = false; } // Generic bitwise and. def G_AND : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic bitwise or. def G_OR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic bitwise xor. def G_XOR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic left-shift. def G_SHL : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type1:$src2); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic logical right-shift. def G_LSHR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type1:$src2); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic arithmetic right-shift. def G_ASHR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type1:$src2); - let hasSideEffects = 0; + let hasSideEffects = false; } /// Funnel 'double' shifts take 3 operands, 2 inputs and the shift amount. @@ -335,7 +335,7 @@ def G_ASHR : GenericInstruction { def G_FSHL : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2, type1:$src3); - let hasSideEffects = 0; + let hasSideEffects = false; } /// Funnel 'double' shifts take 3 operands, 2 inputs and the shift amount. @@ -343,35 +343,35 @@ def G_FSHL : GenericInstruction { def G_FSHR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2, type1:$src3); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic integer comparison. def G_ICMP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$tst, type1:$src1, type1:$src2); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic floating-point comparison. def G_FCMP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$tst, type1:$src1, type1:$src2); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic select def G_SELECT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$tst, type0:$src1, type0:$src2); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic pointer offset. def G_PTR_ADD : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type1:$src2); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic pointer mask. type1 should be an integer with the same @@ -379,39 +379,46 @@ def G_PTR_ADD : GenericInstruction { def G_PTRMASK : GenericInstruction { let OutOperandList = (outs ptype0:$dst); let InOperandList = (ins ptype0:$src, type1:$bits); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic signed integer minimum. def G_SMIN : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic signed integer maximum. def G_SMAX : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic unsigned integer minimum. def G_UMIN : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic unsigned integer maximum. def G_UMAX : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; +} + +// Generic integer absolute value. +def G_ABS : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src); + let hasSideEffects = false; } //------------------------------------------------------------------------------ @@ -422,73 +429,73 @@ def G_UMAX : GenericInstruction { def G_UADDO : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic unsigned addition consuming and producing a carry flag. def G_UADDE : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic signed addition producing a carry flag. def G_SADDO : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic signed addition consuming and producing a carry flag. def G_SADDE : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic unsigned subtraction producing a carry flag. def G_USUBO : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic unsigned subtraction consuming and producing a carry flag. def G_USUBE : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic signed subtraction producing a carry flag. def G_SSUBO : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic signed subtraction consuming and producing a carry flag. def G_SSUBE : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic unsigned multiplication producing a carry flag. def G_UMULO : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic signed multiplication producing a carry flag. def G_SMULO : GenericInstruction { let OutOperandList = (outs type0:$dst, type1:$carry_out); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Multiply two numbers at twice the incoming bit width (unsigned) and return @@ -496,8 +503,8 @@ def G_SMULO : GenericInstruction { def G_UMULH : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Multiply two numbers at twice the incoming bit width (signed) and return @@ -505,8 +512,8 @@ def G_UMULH : GenericInstruction { def G_SMULH : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } //------------------------------------------------------------------------------ @@ -517,32 +524,119 @@ def G_SMULH : GenericInstruction { def G_UADDSAT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic saturating signed addition. def G_SADDSAT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic saturating unsigned subtraction. def G_USUBSAT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 0; + let hasSideEffects = false; + let isCommutable = false; } // Generic saturating signed subtraction. def G_SSUBSAT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 0; + let hasSideEffects = false; + let isCommutable = false; +} + +// Generic saturating unsigned left shift. +def G_USHLSAT : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type1:$src2); + let hasSideEffects = false; + let isCommutable = false; +} + +// Generic saturating signed left shift. +def G_SSHLSAT : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type1:$src2); + let hasSideEffects = false; + let isCommutable = false; +} + +/// RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point +/// multiplication on 2 integers with the same width and scale. SCALE +/// represents the scale of both operands as fixed point numbers. This +/// SCALE parameter must be a constant integer. A scale of zero is +/// effectively performing multiplication on 2 integers. +def G_SMULFIX : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale); + let hasSideEffects = false; + let isCommutable = true; +} + +def G_UMULFIX : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale); + let hasSideEffects = false; + let isCommutable = true; +} + +/// Same as the corresponding unsaturated fixed point instructions, but the +/// result is clamped between the min and max values representable by the +/// bits of the first 2 operands. +def G_SMULFIXSAT : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale); + let hasSideEffects = false; + let isCommutable = true; +} + +def G_UMULFIXSAT : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale); + let hasSideEffects = false; + let isCommutable = true; +} + +/// RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on +/// 2 integers with the same width and scale. SCALE represents the scale +/// of both operands as fixed point numbers. This SCALE parameter must be a +/// constant integer. +def G_SDIVFIX : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale); + let hasSideEffects = false; + let isCommutable = false; +} + +def G_UDIVFIX : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale); + let hasSideEffects = false; + let isCommutable = false; +} + +/// Same as the corresponding unsaturated fixed point instructions, +/// but the result is clamped between the min and max values +/// representable by the bits of the first 2 operands. +def G_SDIVFIXSAT : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale); + let hasSideEffects = false; + let isCommutable = false; +} + +def G_UDIVFIXSAT : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type0:$src1, untyped_imm_0:$scale); + let hasSideEffects = false; + let isCommutable = false; } //------------------------------------------------------------------------------ @@ -552,61 +646,61 @@ def G_SSUBSAT : GenericInstruction { def G_FNEG : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_FPEXT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_FPTRUNC : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_FPTOSI : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_FPTOUI : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_SITOFP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_UITOFP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_FABS : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_FCOPYSIGN : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src0, type1:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_FCANONICALIZE : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } // FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two @@ -619,15 +713,15 @@ def G_FCANONICALIZE : GenericInstruction { def G_FMINNUM : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } def G_FMAXNUM : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on @@ -637,15 +731,15 @@ def G_FMAXNUM : GenericInstruction { def G_FMINNUM_IEEE : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } def G_FMAXNUM_IEEE : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 @@ -654,15 +748,15 @@ def G_FMAXNUM_IEEE : GenericInstruction { def G_FMINIMUM : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } def G_FMAXIMUM : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } //------------------------------------------------------------------------------ @@ -673,24 +767,24 @@ def G_FMAXIMUM : GenericInstruction { def G_FADD : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic FP subtraction. def G_FSUB : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 0; + let hasSideEffects = false; + let isCommutable = false; } // Generic FP multiplication. def G_FMUL : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; - let isCommutable = 1; + let hasSideEffects = false; + let isCommutable = true; } // Generic fused multiply-add instruction. @@ -698,8 +792,8 @@ def G_FMUL : GenericInstruction { def G_FMA : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3); - let hasSideEffects = 0; - let isCommutable = 0; + let hasSideEffects = false; + let isCommutable = false; } /// Generic FP multiply and add. Perform a * b + c, while getting the @@ -707,85 +801,92 @@ def G_FMA : GenericInstruction { def G_FMAD : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3); - let hasSideEffects = 0; - let isCommutable = 0; + let hasSideEffects = false; + let isCommutable = false; } // Generic FP division. def G_FDIV : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic FP remainder. def G_FREM : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; + let hasSideEffects = false; } // Floating point exponentiation. def G_FPOW : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1, type0:$src2); - let hasSideEffects = 0; + let hasSideEffects = false; +} + +// Floating point exponentiation, with an integer power. +def G_FPOWI : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src0, type1:$src1); + let hasSideEffects = false; } // Floating point base-e exponential of a value. def G_FEXP : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } // Floating point base-2 exponential of a value. def G_FEXP2 : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } // Floating point base-e logarithm of a value. def G_FLOG : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } // Floating point base-2 logarithm of a value. def G_FLOG2 : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } // Floating point base-10 logarithm of a value. def G_FLOG10 : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } // Floating point ceiling of a value. def G_FCEIL : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } // Floating point cosine of a value. def G_FCOS : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } // Floating point sine of a value. def G_FSIN : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } // Floating point square root of a value. @@ -795,28 +896,28 @@ def G_FSIN : GenericInstruction { def G_FSQRT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } // Floating point floor of a value. def G_FFLOOR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } // Floating point round to next integer. def G_FRINT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } // Floating point round to the nearest integer. def G_FNEARBYINT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } //------------------------------------------------------------------------------ @@ -825,19 +926,31 @@ def G_FNEARBYINT : GenericInstruction { def G_INTRINSIC_TRUNC : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; } def G_INTRINSIC_ROUND : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; + let hasSideEffects = false; +} + +def G_INTRINSIC_LRINT : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src); + let hasSideEffects = false; +} + +def G_INTRINSIC_ROUNDEVEN : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1); + let hasSideEffects = false; } def G_READCYCLECOUNTER : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins); - let hasSideEffects = 1; + let hasSideEffects = true; } //------------------------------------------------------------------------------ @@ -852,24 +965,24 @@ def G_READCYCLECOUNTER : GenericInstruction { def G_LOAD : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins ptype1:$addr); - let hasSideEffects = 0; - let mayLoad = 1; + let hasSideEffects = false; + let mayLoad = true; } // Generic sign-extended load. Expects a MachineMemOperand in addition to explicit operands. def G_SEXTLOAD : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins ptype1:$addr); - let hasSideEffects = 0; - let mayLoad = 1; + let hasSideEffects = false; + let mayLoad = true; } // Generic zero-extended load. Expects a MachineMemOperand in addition to explicit operands. def G_ZEXTLOAD : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins ptype1:$addr); - let hasSideEffects = 0; - let mayLoad = 1; + let hasSideEffects = false; + let mayLoad = true; } // Generic indexed load. Combines a GEP with a load. $newaddr is set to $base + $offset. @@ -878,32 +991,32 @@ def G_ZEXTLOAD : GenericInstruction { def G_INDEXED_LOAD : GenericInstruction { let OutOperandList = (outs type0:$dst, ptype1:$newaddr); let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); - let hasSideEffects = 0; - let mayLoad = 1; + let hasSideEffects = false; + let mayLoad = true; } // Same as G_INDEXED_LOAD except that the load performed is sign-extending, as with G_SEXTLOAD. def G_INDEXED_SEXTLOAD : GenericInstruction { let OutOperandList = (outs type0:$dst, ptype1:$newaddr); let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); - let hasSideEffects = 0; - let mayLoad = 1; + let hasSideEffects = false; + let mayLoad = true; } // Same as G_INDEXED_LOAD except that the load performed is zero-extending, as with G_ZEXTLOAD. def G_INDEXED_ZEXTLOAD : GenericInstruction { let OutOperandList = (outs type0:$dst, ptype1:$newaddr); let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); - let hasSideEffects = 0; - let mayLoad = 1; + let hasSideEffects = false; + let mayLoad = true; } // Generic store. Expects a MachineMemOperand in addition to explicit operands. def G_STORE : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins type0:$src, ptype1:$addr); - let hasSideEffects = 0; - let mayStore = 1; + let hasSideEffects = false; + let mayStore = true; } // Combines a store with a GEP. See description of G_INDEXED_LOAD for indexing behaviour. @@ -911,8 +1024,8 @@ def G_INDEXED_STORE : GenericInstruction { let OutOperandList = (outs ptype0:$newaddr); let InOperandList = (ins type1:$src, ptype0:$base, ptype2:$offset, unknown:$am); - let hasSideEffects = 0; - let mayStore = 1; + let hasSideEffects = false; + let mayStore = true; } // Generic atomic cmpxchg with internal success check. Expects a @@ -920,9 +1033,9 @@ def G_INDEXED_STORE : GenericInstruction { def G_ATOMIC_CMPXCHG_WITH_SUCCESS : GenericInstruction { let OutOperandList = (outs type0:$oldval, type1:$success); let InOperandList = (ins type2:$addr, type0:$cmpval, type0:$newval); - let hasSideEffects = 0; - let mayLoad = 1; - let mayStore = 1; + let hasSideEffects = false; + let mayLoad = true; + let mayStore = true; } // Generic atomic cmpxchg. Expects a MachineMemOperand in addition to explicit @@ -930,9 +1043,9 @@ def G_ATOMIC_CMPXCHG_WITH_SUCCESS : GenericInstruction { def G_ATOMIC_CMPXCHG : GenericInstruction { let OutOperandList = (outs type0:$oldval); let InOperandList = (ins ptype1:$addr, type0:$cmpval, type0:$newval); - let hasSideEffects = 0; - let mayLoad = 1; - let mayStore = 1; + let hasSideEffects = false; + let mayLoad = true; + let mayStore = true; } // Generic atomicrmw. Expects a MachineMemOperand in addition to explicit @@ -940,9 +1053,9 @@ def G_ATOMIC_CMPXCHG : GenericInstruction { class G_ATOMICRMW_OP : GenericInstruction { let OutOperandList = (outs type0:$oldval); let InOperandList = (ins ptype1:$addr, type0:$val); - let hasSideEffects = 0; - let mayLoad = 1; - let mayStore = 1; + let hasSideEffects = false; + let mayLoad = true; + let mayStore = true; } def G_ATOMICRMW_XCHG : G_ATOMICRMW_OP; @@ -962,7 +1075,7 @@ def G_ATOMICRMW_FSUB : G_ATOMICRMW_OP; def G_FENCE : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$ordering, i32imm:$scope); - let hasSideEffects = 1; + let hasSideEffects = true; } //------------------------------------------------------------------------------ @@ -975,7 +1088,7 @@ def G_FENCE : GenericInstruction { def G_EXTRACT : GenericInstruction { let OutOperandList = (outs type0:$res); let InOperandList = (ins type1:$src, untyped_imm_0:$offset); - let hasSideEffects = 0; + let hasSideEffects = false; } // Extract multiple registers specified size, starting from blocks given by @@ -987,14 +1100,14 @@ def G_EXTRACT : GenericInstruction { def G_UNMERGE_VALUES : GenericInstruction { let OutOperandList = (outs type0:$dst0, variable_ops); let InOperandList = (ins type1:$src); - let hasSideEffects = 0; + let hasSideEffects = false; } // Insert a smaller register into a larger one at the specified bit-index. def G_INSERT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src, type1:$op, untyped_imm_0:$offset); - let hasSideEffects = 0; + let hasSideEffects = false; } // Concatenate multiple registers of the same size into a wider register. @@ -1004,7 +1117,7 @@ def G_INSERT : GenericInstruction { def G_MERGE_VALUES : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src0, variable_ops); - let hasSideEffects = 0; + let hasSideEffects = false; } /// Create a vector from multiple scalar registers. No implicit @@ -1013,7 +1126,7 @@ def G_MERGE_VALUES : GenericInstruction { def G_BUILD_VECTOR : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src0, variable_ops); - let hasSideEffects = 0; + let hasSideEffects = false; } /// Like G_BUILD_VECTOR, but truncates the larger operand types to fit the @@ -1021,24 +1134,24 @@ def G_BUILD_VECTOR : GenericInstruction { def G_BUILD_VECTOR_TRUNC : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src0, variable_ops); - let hasSideEffects = 0; + let hasSideEffects = false; } /// Create a vector by concatenating vectors together. def G_CONCAT_VECTORS : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src0, variable_ops); - let hasSideEffects = 0; + let hasSideEffects = false; } // Intrinsic without side effects. def G_INTRINSIC : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins unknown:$intrin, variable_ops); - let hasSideEffects = 0; + let hasSideEffects = false; // Conservatively assume this is convergent. If there turnes out to - // be a need, there should be separate convergent intrinsic opcode.s + // be a need, there should be separate convergent intrinsic opcodes. let isConvergent = 1; } @@ -1046,13 +1159,13 @@ def G_INTRINSIC : GenericInstruction { def G_INTRINSIC_W_SIDE_EFFECTS : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins unknown:$intrin, variable_ops); - let hasSideEffects = 1; - let mayLoad = 1; - let mayStore = 1; + let hasSideEffects = true; + let mayLoad = true; + let mayStore = true; // Conservatively assume this is convergent. If there turnes out to - // be a need, there should be separate convergent intrinsic opcode.s - let isConvergent = 1; + // be a need, there should be separate convergent intrinsic opcodes. + let isConvergent = true; } //------------------------------------------------------------------------------ @@ -1063,61 +1176,61 @@ def G_INTRINSIC_W_SIDE_EFFECTS : GenericInstruction { def G_BR : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins unknown:$src1); - let hasSideEffects = 0; - let isBranch = 1; - let isTerminator = 1; - let isBarrier = 1; + let hasSideEffects = false; + let isBranch = true; + let isTerminator = true; + let isBarrier = true; } // Generic conditional branch. def G_BRCOND : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins type0:$tst, unknown:$truebb); - let hasSideEffects = 0; - let isBranch = 1; - let isTerminator = 1; + let hasSideEffects = false; + let isBranch = true; + let isTerminator = true; } // Generic indirect branch. def G_BRINDIRECT : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins type0:$src1); - let hasSideEffects = 0; - let isBranch = 1; - let isTerminator = 1; - let isBarrier = 1; - let isIndirectBranch = 1; + let hasSideEffects = false; + let isBranch = true; + let isTerminator = true; + let isBarrier = true; + let isIndirectBranch = true; } // Generic branch to jump table entry def G_BRJT : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins ptype0:$tbl, unknown:$jti, type1:$idx); - let hasSideEffects = 0; - let isBranch = 1; - let isTerminator = 1; - let isBarrier = 1; - let isIndirectBranch = 1; + let hasSideEffects = false; + let isBranch = true; + let isTerminator = true; + let isBarrier = true; + let isIndirectBranch = true; } def G_READ_REGISTER : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins unknown:$register); - let hasSideEffects = 1; + let hasSideEffects = true; // Assume convergent. It's probably not worth the effort of somehow // modeling convergent and nonconvergent register accesses. - let isConvergent = 1; + let isConvergent = true; } def G_WRITE_REGISTER : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins unknown:$register, type0:$value); - let hasSideEffects = 1; + let hasSideEffects = true; // Assume convergent. It's probably not worth the effort of somehow // modeling convergent and nonconvergent register accesses. - let isConvergent = 1; + let isConvergent = true; } //------------------------------------------------------------------------------ @@ -1128,14 +1241,14 @@ def G_WRITE_REGISTER : GenericInstruction { def G_INSERT_VECTOR_ELT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type0:$src, type1:$elt, type2:$idx); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic extractelement. def G_EXTRACT_VECTOR_ELT : GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$src, type2:$idx); - let hasSideEffects = 0; + let hasSideEffects = false; } // Generic shufflevector. @@ -1145,10 +1258,48 @@ def G_EXTRACT_VECTOR_ELT : GenericInstruction { def G_SHUFFLE_VECTOR: GenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$v1, type1:$v2, unknown:$mask); - let hasSideEffects = 0; + let hasSideEffects = false; } //------------------------------------------------------------------------------ +// Vector reductions +//------------------------------------------------------------------------------ + +class VectorReduction : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$v); + let hasSideEffects = false; +} + +def G_VECREDUCE_SEQ_FADD : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$acc, type2:$v); + let hasSideEffects = false; +} + +def G_VECREDUCE_SEQ_FMUL : GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$acc, type2:$v); + let hasSideEffects = false; +} + +def G_VECREDUCE_FADD : VectorReduction; +def G_VECREDUCE_FMUL : VectorReduction; + +def G_VECREDUCE_FMAX : VectorReduction; +def G_VECREDUCE_FMIN : VectorReduction; + +def G_VECREDUCE_ADD : VectorReduction; +def G_VECREDUCE_MUL : VectorReduction; +def G_VECREDUCE_AND : VectorReduction; +def G_VECREDUCE_OR : VectorReduction; +def G_VECREDUCE_XOR : VectorReduction; +def G_VECREDUCE_SMAX : VectorReduction; +def G_VECREDUCE_SMIN : VectorReduction; +def G_VECREDUCE_UMAX : VectorReduction; +def G_VECREDUCE_UMIN : VectorReduction; + +//------------------------------------------------------------------------------ // Constrained floating point ops //------------------------------------------------------------------------------ @@ -1159,3 +1310,30 @@ def G_STRICT_FDIV : ConstrainedIntruction<G_FDIV>; def G_STRICT_FREM : ConstrainedIntruction<G_FREM>; def G_STRICT_FMA : ConstrainedIntruction<G_FMA>; def G_STRICT_FSQRT : ConstrainedIntruction<G_FSQRT>; + +//------------------------------------------------------------------------------ +// Memory intrinsics +//------------------------------------------------------------------------------ + +def G_MEMCPY : GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall); + let hasSideEffects = false; + let mayLoad = true; + let mayStore = true; +} + +def G_MEMMOVE : GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall); + let hasSideEffects = false; + let mayLoad = true; + let mayStore = true; +} + +def G_MEMSET : GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins ptype0:$dst_addr, type1:$value, type2:$size, untyped_imm_0:$tailcall); + let hasSideEffects = false; + let mayStore = true; +} diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 1dd3e374b524..e2c7a90a1b16 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -85,6 +85,7 @@ class GIDefMatchData<string type> : GIDefKind { def extending_load_matchdata : GIDefMatchData<"PreferredTuple">; def indexed_load_store_matchdata : GIDefMatchData<"IndexedLoadStoreMatchInfo">; +def instruction_steps_matchdata: GIDefMatchData<"InstructionStepsMatchInfo">; /// The operator at the root of a GICombineRule.Match dag. def match; @@ -125,11 +126,18 @@ def extending_loads : GICombineRule< (apply [{ Helper.applyCombineExtendingLoads(*${root}, ${matchinfo}); }])>; def combines_for_extload: GICombineGroup<[extending_loads]>; -def sext_already_extended : GICombineRule< +def sext_trunc_sextload : GICombineRule< (defs root:$d), (match (wip_match_opcode G_SEXT_INREG):$d, - [{ return Helper.matchSextAlreadyExtended(*${d}); }]), - (apply [{ Helper.applySextAlreadyExtended(*${d}); }])>; + [{ return Helper.matchSextTruncSextLoad(*${d}); }]), + (apply [{ Helper.applySextTruncSextLoad(*${d}); }])>; + +def sext_inreg_of_load_matchdata : GIDefMatchData<"std::tuple<Register, unsigned>">; +def sext_inreg_of_load : GICombineRule< + (defs root:$root, sext_inreg_of_load_matchdata:$matchinfo), + (match (wip_match_opcode G_SEXT_INREG):$root, + [{ return Helper.matchSextInRegOfLoad(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applySextInRegOfLoad(*${root}, ${matchinfo}); }])>; def combine_indexed_load_store : GICombineRule< (defs root:$root, indexed_load_store_matchdata:$matchinfo), @@ -137,13 +145,11 @@ def combine_indexed_load_store : GICombineRule< [{ return Helper.matchCombineIndexedLoadStore(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyCombineIndexedLoadStore(*${root}, ${matchinfo}); }])>; -// FIXME: Is there a reason this wasn't in tryCombine? I've left it out of -// all_combines because it wasn't there. -def elide_br_by_inverting_cond : GICombineRule< +def opt_brcond_by_inverting_cond : GICombineRule< (defs root:$root), (match (wip_match_opcode G_BR):$root, - [{ return Helper.matchElideBrByInvertingCond(*${root}); }]), - (apply [{ Helper.applyElideBrByInvertingCond(*${root}); }])>; + [{ return Helper.matchOptBrCondByInvertingCond(*${root}); }]), + (apply [{ Helper.applyOptBrCondByInvertingCond(*${root}); }])>; def ptr_add_immed_matchdata : GIDefMatchData<"PtrAddChain">; def ptr_add_immed_chain : GICombineRule< @@ -152,6 +158,23 @@ def ptr_add_immed_chain : GICombineRule< [{ return Helper.matchPtrAddImmedChain(*${d}, ${matchinfo}); }]), (apply [{ Helper.applyPtrAddImmedChain(*${d}, ${matchinfo}); }])>; +// Fold shift (shift base x), y -> shift base, (x+y), if shifts are same +def shift_immed_matchdata : GIDefMatchData<"RegisterImmPair">; +def shift_immed_chain : GICombineRule< + (defs root:$d, shift_immed_matchdata:$matchinfo), + (match (wip_match_opcode G_SHL, G_ASHR, G_LSHR, G_SSHLSAT, G_USHLSAT):$d, + [{ return Helper.matchShiftImmedChain(*${d}, ${matchinfo}); }]), + (apply [{ Helper.applyShiftImmedChain(*${d}, ${matchinfo}); }])>; + +// Transform shift (logic (shift X, C0), Y), C1 +// -> logic (shift X, (C0+C1)), (shift Y, C1), if shifts are same +def shift_of_shifted_logic_matchdata : GIDefMatchData<"ShiftOfShiftedLogic">; +def shift_of_shifted_logic_chain : GICombineRule< + (defs root:$d, shift_of_shifted_logic_matchdata:$matchinfo), + (match (wip_match_opcode G_SHL, G_ASHR, G_LSHR, G_USHLSAT, G_SSHLSAT):$d, + [{ return Helper.matchShiftOfShiftedLogic(*${d}, ${matchinfo}); }]), + (apply [{ Helper.applyShiftOfShiftedLogic(*${d}, ${matchinfo}); }])>; + def mul_to_shl_matchdata : GIDefMatchData<"unsigned">; def mul_to_shl : GICombineRule< (defs root:$d, mul_to_shl_matchdata:$matchinfo), @@ -159,6 +182,14 @@ def mul_to_shl : GICombineRule< [{ return Helper.matchCombineMulToShl(*${mi}, ${matchinfo}); }]), (apply [{ Helper.applyCombineMulToShl(*${mi}, ${matchinfo}); }])>; +// shl ([asz]ext x), y => zext (shl x, y), if shift does not overflow int +def reduce_shl_of_extend_matchdata : GIDefMatchData<"RegisterImmPair">; +def reduce_shl_of_extend : GICombineRule< + (defs root:$dst, reduce_shl_of_extend_matchdata:$matchinfo), + (match (G_SHL $dst, $src0, $src1):$mi, + [{ return Helper.matchCombineShlOfExtend(*${mi}, ${matchinfo}); }]), + (apply [{ Helper.applyCombineShlOfExtend(*${mi}, ${matchinfo}); }])>; + // [us]itofp(undef) = 0, because the result value is bounded. def undef_to_fp_zero : GICombineRule< (defs root:$root), @@ -178,11 +209,17 @@ def undef_to_negative_one: GICombineRule< [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]), (apply [{ Helper.replaceInstWithConstant(*${root}, -1); }])>; +def binop_left_undef_to_zero: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_SHL):$root, + [{ return Helper.matchOperandIsUndef(*${root}, 1); }]), + (apply [{ Helper.replaceInstWithConstant(*${root}, 0); }])>; + // Instructions where if any source operand is undef, the instruction can be // replaced with undef. def propagate_undef_any_op: GICombineRule< (defs root:$root), - (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR):$root, + (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC):$root, [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]), (apply [{ Helper.replaceInstWithUndef(*${root}); }])>; @@ -209,6 +246,24 @@ def select_same_val: GICombineRule< (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 2); }]) >; +// Fold (undef ? x : y) -> y +def select_undef_cmp: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_SELECT):$root, + [{ return Helper.matchUndefSelectCmp(*${root}); }]), + (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 2); }]) +>; + +// Fold (true ? x : y) -> x +// Fold (false ? x : y) -> y +def select_constant_cmp_matchdata : GIDefMatchData<"unsigned">; +def select_constant_cmp: GICombineRule< + (defs root:$root, select_constant_cmp_matchdata:$matchinfo), + (match (wip_match_opcode G_SELECT):$root, + [{ return Helper.matchConstantSelectCmp(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, ${matchinfo}); }]) +>; + // Fold x op 0 -> x def right_identity_zero: GICombineRule< (defs root:$root), @@ -217,6 +272,14 @@ def right_identity_zero: GICombineRule< (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) >; +// Fold x op 1 -> x +def right_identity_one: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_MUL):$root, + [{ return Helper.matchConstantOp(${root}->getOperand(2), 1); }]), + (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) +>; + // Fold (x op x) - > x def binop_same_val: GICombineRule< (defs root:$root), @@ -233,6 +296,13 @@ def binop_left_to_zero: GICombineRule< (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) >; +def urem_pow2_to_mask : GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_UREM):$root, + [{ return Helper.matchOperandIsKnownToBeAPowerOfTwo(*${root}, 2); }]), + (apply [{ return Helper.applySimplifyURemByPow2(*${root}); }]) +>; + // Fold (x op 0) - > 0 def binop_right_to_zero: GICombineRule< (defs root:$root), @@ -257,9 +327,240 @@ def simplify_add_to_sub: GICombineRule < (apply [{ return Helper.applySimplifyAddToSub(*${root}, ${info});}]) >; +// Fold fp_op(cst) to the constant result of the floating point operation. +def constant_fp_op_matchinfo: GIDefMatchData<"Optional<APFloat>">; +def constant_fp_op: GICombineRule < + (defs root:$root, constant_fp_op_matchinfo:$info), + (match (wip_match_opcode G_FNEG, G_FABS, G_FPTRUNC, G_FSQRT, G_FLOG2):$root, + [{ return Helper.matchCombineConstantFoldFpUnary(*${root}, ${info}); }]), + (apply [{ return Helper.applyCombineConstantFoldFpUnary(*${root}, ${info}); }]) +>; + +// Fold int2ptr(ptr2int(x)) -> x +def p2i_to_i2p_matchinfo: GIDefMatchData<"Register">; +def p2i_to_i2p: GICombineRule< + (defs root:$root, p2i_to_i2p_matchinfo:$info), + (match (wip_match_opcode G_INTTOPTR):$root, + [{ return Helper.matchCombineI2PToP2I(*${root}, ${info}); }]), + (apply [{ return Helper.applyCombineI2PToP2I(*${root}, ${info}); }]) +>; + +// Fold ptr2int(int2ptr(x)) -> x +def i2p_to_p2i_matchinfo: GIDefMatchData<"Register">; +def i2p_to_p2i: GICombineRule< + (defs root:$root, i2p_to_p2i_matchinfo:$info), + (match (wip_match_opcode G_PTRTOINT):$root, + [{ return Helper.matchCombineP2IToI2P(*${root}, ${info}); }]), + (apply [{ return Helper.applyCombineP2IToI2P(*${root}, ${info}); }]) +>; + +// Fold add ptrtoint(x), y -> ptrtoint (ptr_add x), y +def add_p2i_to_ptradd_matchinfo : GIDefMatchData<"std::pair<Register, bool>">; +def add_p2i_to_ptradd : GICombineRule< + (defs root:$root, add_p2i_to_ptradd_matchinfo:$info), + (match (wip_match_opcode G_ADD):$root, + [{ return Helper.matchCombineAddP2IToPtrAdd(*${root}, ${info}); }]), + (apply [{ return Helper.applyCombineAddP2IToPtrAdd(*${root}, ${info}); }]) +>; + +// Fold (ptr_add (int2ptr C1), C2) -> C1 + C2 +def const_ptradd_to_i2p_matchinfo : GIDefMatchData<"int64_t">; +def const_ptradd_to_i2p: GICombineRule< + (defs root:$root, const_ptradd_to_i2p_matchinfo:$info), + (match (wip_match_opcode G_PTR_ADD):$root, + [{ return Helper.matchCombineConstPtrAddToI2P(*${root}, ${info}); }]), + (apply [{ return Helper.applyCombineConstPtrAddToI2P(*${root}, ${info}); }]) +>; + +// Simplify: (logic_op (op x...), (op y...)) -> (op (logic_op x, y)) +def hoist_logic_op_with_same_opcode_hands: GICombineRule < + (defs root:$root, instruction_steps_matchdata:$info), + (match (wip_match_opcode G_AND, G_OR, G_XOR):$root, + [{ return Helper.matchHoistLogicOpWithSameOpcodeHands(*${root}, ${info}); }]), + (apply [{ return Helper.applyBuildInstructionSteps(*${root}, ${info});}]) +>; + +// Fold ashr (shl x, C), C -> sext_inreg (C) +def shl_ashr_to_sext_inreg_matchinfo : GIDefMatchData<"std::tuple<Register, int64_t>">; +def shl_ashr_to_sext_inreg : GICombineRule< + (defs root:$root, shl_ashr_to_sext_inreg_matchinfo:$info), + (match (wip_match_opcode G_ASHR): $root, + [{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]), + (apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}]) +>; +// Fold (x & y) -> x or (x & y) -> y when (x & y) is known to equal x or equal y. +def redundant_and_matchinfo : GIDefMatchData<"Register">; +def redundant_and: GICombineRule < + (defs root:$root, redundant_and_matchinfo:$matchinfo), + (match (wip_match_opcode G_AND):$root, + [{ return Helper.matchRedundantAnd(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) +>; + +// Fold (x | y) -> x or (x | y) -> y when (x | y) is known to equal x or equal y. +def redundant_or_matchinfo : GIDefMatchData<"Register">; +def redundant_or: GICombineRule < + (defs root:$root, redundant_or_matchinfo:$matchinfo), + (match (wip_match_opcode G_OR):$root, + [{ return Helper.matchRedundantOr(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) +>; + +// If the input is already sign extended, just drop the extension. +// sext_inreg x, K -> +// if computeNumSignBits(x) >= (x.getScalarSizeInBits() - K + 1) +def redundant_sext_inreg: GICombineRule < + (defs root:$root), + (match (wip_match_opcode G_SEXT_INREG):$root, + [{ return Helper.matchRedundantSExtInReg(*${root}); }]), + (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) +>; + +// Fold (anyext (trunc x)) -> x if the source type is same as +// the destination type. +def anyext_trunc_fold_matchinfo : GIDefMatchData<"Register">; +def anyext_trunc_fold: GICombineRule < + (defs root:$root, anyext_trunc_fold_matchinfo:$matchinfo), + (match (wip_match_opcode G_ANYEXT):$root, + [{ return Helper.matchCombineAnyExtTrunc(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyCombineAnyExtTrunc(*${root}, ${matchinfo}); }]) +>; + +// Fold ([asz]ext ([asz]ext x)) -> ([asz]ext x). +def ext_ext_fold_matchinfo : GIDefMatchData<"std::tuple<Register, unsigned>">; +def ext_ext_fold: GICombineRule < + (defs root:$root, ext_ext_fold_matchinfo:$matchinfo), + (match (wip_match_opcode G_ANYEXT, G_SEXT, G_ZEXT):$root, + [{ return Helper.matchCombineExtOfExt(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyCombineExtOfExt(*${root}, ${matchinfo}); }]) +>; + +def not_cmp_fold_matchinfo : GIDefMatchData<"SmallVector<Register, 4>">; +def not_cmp_fold : GICombineRule< + (defs root:$d, not_cmp_fold_matchinfo:$info), + (match (wip_match_opcode G_XOR): $d, + [{ return Helper.matchNotCmp(*${d}, ${info}); }]), + (apply [{ return Helper.applyNotCmp(*${d}, ${info}); }]) +>; + +// Fold (fneg (fneg x)) -> x. +def fneg_fneg_fold_matchinfo : GIDefMatchData<"Register">; +def fneg_fneg_fold: GICombineRule < + (defs root:$root, fneg_fneg_fold_matchinfo:$matchinfo), + (match (wip_match_opcode G_FNEG):$root, + [{ return Helper.matchCombineFNegOfFNeg(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) +>; + +// Fold (unmerge(merge x, y, z)) -> z, y, z. +def unmerge_merge_matchinfo : GIDefMatchData<"SmallVector<Register, 8>">; +def unmerge_merge : GICombineRule< + (defs root:$d, unmerge_merge_matchinfo:$info), + (match (wip_match_opcode G_UNMERGE_VALUES): $d, + [{ return Helper.matchCombineUnmergeMergeToPlainValues(*${d}, ${info}); }]), + (apply [{ return Helper.applyCombineUnmergeMergeToPlainValues(*${d}, ${info}); }]) +>; + +// Fold (fabs (fabs x)) -> (fabs x). +def fabs_fabs_fold_matchinfo : GIDefMatchData<"Register">; +def fabs_fabs_fold: GICombineRule< + (defs root:$root, fabs_fabs_fold_matchinfo:$matchinfo), + (match (wip_match_opcode G_FABS):$root, + [{ return Helper.matchCombineFAbsOfFAbs(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyCombineFAbsOfFAbs(*${root}, ${matchinfo}); }]) +>; + +// Fold (unmerge cst) -> cst1, cst2, ... +def unmerge_cst_matchinfo : GIDefMatchData<"SmallVector<APInt, 8>">; +def unmerge_cst : GICombineRule< + (defs root:$d, unmerge_cst_matchinfo:$info), + (match (wip_match_opcode G_UNMERGE_VALUES): $d, + [{ return Helper.matchCombineUnmergeConstant(*${d}, ${info}); }]), + (apply [{ return Helper.applyCombineUnmergeConstant(*${d}, ${info}); }]) +>; + +// Transform x,y<dead> = unmerge z -> x = trunc z. +def unmerge_dead_to_trunc : GICombineRule< + (defs root:$d), + (match (wip_match_opcode G_UNMERGE_VALUES): $d, + [{ return Helper.matchCombineUnmergeWithDeadLanesToTrunc(*${d}); }]), + (apply [{ return Helper.applyCombineUnmergeWithDeadLanesToTrunc(*${d}); }]) +>; + +// Transform x,y = unmerge(zext(z)) -> x = zext z; y = 0. +def unmerge_zext_to_zext : GICombineRule< + (defs root:$d), + (match (wip_match_opcode G_UNMERGE_VALUES): $d, + [{ return Helper.matchCombineUnmergeZExtToZExt(*${d}); }]), + (apply [{ return Helper.applyCombineUnmergeZExtToZExt(*${d}); }]) +>; + +// Fold trunc ([asz]ext x) -> x or ([asz]ext x) or (trunc x). +def trunc_ext_fold_matchinfo : GIDefMatchData<"std::pair<Register, unsigned>">; +def trunc_ext_fold: GICombineRule < + (defs root:$root, trunc_ext_fold_matchinfo:$matchinfo), + (match (wip_match_opcode G_TRUNC):$root, + [{ return Helper.matchCombineTruncOfExt(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyCombineTruncOfExt(*${root}, ${matchinfo}); }]) +>; + +// Fold trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits(). +def trunc_shl_matchinfo : GIDefMatchData<"std::pair<Register, Register>">; +def trunc_shl: GICombineRule < + (defs root:$root, trunc_shl_matchinfo:$matchinfo), + (match (wip_match_opcode G_TRUNC):$root, + [{ return Helper.matchCombineTruncOfShl(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyCombineTruncOfShl(*${root}, ${matchinfo}); }]) +>; + +// Transform (mul x, -1) -> (sub 0, x) +def mul_by_neg_one: GICombineRule < + (defs root:$root), + (match (wip_match_opcode G_MUL):$root, + [{ return Helper.matchConstantOp(${root}->getOperand(2), -1); }]), + (apply [{ return Helper.applyCombineMulByNegativeOne(*${root}); }]) +>; + +// Fold (xor (and x, y), y) -> (and (not x), y) +def xor_of_and_with_same_reg_matchinfo : + GIDefMatchData<"std::pair<Register, Register>">; +def xor_of_and_with_same_reg: GICombineRule < + (defs root:$root, xor_of_and_with_same_reg_matchinfo:$matchinfo), + (match (wip_match_opcode G_XOR):$root, + [{ return Helper.matchXorOfAndWithSameReg(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applyXorOfAndWithSameReg(*${root}, ${matchinfo}); }]) +>; + +// Transform (ptr_add 0, x) -> (int_to_ptr x) +def ptr_add_with_zero: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_PTR_ADD):$root, + [{ return Helper.matchPtrAddZero(*${root}); }]), + (apply [{ return Helper.applyPtrAddZero(*${root}); }])>; + +def regs_small_vec : GIDefMatchData<"SmallVector<Register, 4>">; +def combine_insert_vec_elts_build_vector : GICombineRule< + (defs root:$root, regs_small_vec:$info), + (match (wip_match_opcode G_INSERT_VECTOR_ELT):$root, + [{ return Helper.matchCombineInsertVecElts(*${root}, ${info}); }]), + (apply [{ return Helper.applyCombineInsertVecElts(*${root}, ${info}); }])>; + +def load_or_combine_matchdata : +GIDefMatchData<"std::function<void(MachineIRBuilder &)>">; +def load_or_combine : GICombineRule< + (defs root:$root, load_or_combine_matchdata:$info), + (match (wip_match_opcode G_OR):$root, + [{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]), + (apply [{ return Helper.applyLoadOrCombine(*${root}, ${info}); }])>; + +// Currently only the one combine above. +def insert_vec_elt_combines : GICombineGroup< + [combine_insert_vec_elts_build_vector]>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, + binop_left_undef_to_zero, propagate_undef_any_op, propagate_undef_all_ops, propagate_undef_shuffle_mask, @@ -267,9 +568,31 @@ def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, def identity_combines : GICombineGroup<[select_same_val, right_identity_zero, binop_same_val, binop_left_to_zero, - binop_right_to_zero]>; - -def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl]>; -def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, - combines_for_extload, combine_indexed_load_store, undef_combines, - identity_combines, simplify_add_to_sub]>; + binop_right_to_zero, p2i_to_i2p, + i2p_to_p2i, anyext_trunc_fold, + fneg_fneg_fold, right_identity_one]>; + +def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p]>; + +def known_bits_simplifications : GICombineGroup<[ + redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask]>; + +def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>; + +def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>; + +def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd, + mul_by_neg_one]>; + +def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, + ptr_add_immed_chain, combines_for_extload, combine_indexed_load_store, + undef_combines, identity_combines, simplify_add_to_sub, + hoist_logic_op_with_same_opcode_hands, + shl_ashr_to_sext_inreg, sext_inreg_of_load, + width_reduction_combines, select_combines, + known_bits_simplifications, ext_ext_fold, + not_cmp_fold, opt_brcond_by_inverting_cond, + unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc, + unmerge_zext_to_zext, trunc_ext_fold, trunc_shl, + const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, + shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine]>; diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 150834e65b2d..6fb8a6b15dd7 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -26,8 +26,8 @@ class GINodeEquiv<Instruction i, SDNode node> { // SelectionDAG has separate nodes for atomic and non-atomic memory operations // (ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE) but GlobalISel // stores this information in the MachineMemoryOperand. - bit CheckMMOIsNonAtomic = 0; - bit CheckMMOIsAtomic = 0; + bit CheckMMOIsNonAtomic = false; + bit CheckMMOIsAtomic = false; // SelectionDAG has one node for all loads and uses predicates to // differentiate them. GlobalISel on the other hand uses separate opcodes. @@ -52,6 +52,8 @@ def : GINodeEquiv<G_BITCAST, bitconvert>; def : GINodeEquiv<G_CONSTANT, imm>; def : GINodeEquiv<G_FCONSTANT, fpimm>; def : GINodeEquiv<G_IMPLICIT_DEF, undef>; +def : GINodeEquiv<G_FRAME_INDEX, frameindex>; +def : GINodeEquiv<G_BLOCK_ADDR, blockaddress>; def : GINodeEquiv<G_ADD, add>; def : GINodeEquiv<G_SUB, sub>; def : GINodeEquiv<G_MUL, mul>; @@ -71,6 +73,16 @@ def : GINodeEquiv<G_SADDSAT, saddsat>; def : GINodeEquiv<G_UADDSAT, uaddsat>; def : GINodeEquiv<G_SSUBSAT, ssubsat>; def : GINodeEquiv<G_USUBSAT, usubsat>; +def : GINodeEquiv<G_SSHLSAT, sshlsat>; +def : GINodeEquiv<G_USHLSAT, ushlsat>; +def : GINodeEquiv<G_SMULFIX, smulfix>; +def : GINodeEquiv<G_UMULFIX, umulfix>; +def : GINodeEquiv<G_SMULFIXSAT, smulfixsat>; +def : GINodeEquiv<G_UMULFIXSAT, umulfixsat>; +def : GINodeEquiv<G_SDIVFIX, sdivfix>; +def : GINodeEquiv<G_UDIVFIX, udivfix>; +def : GINodeEquiv<G_SDIVFIXSAT, sdivfixsat>; +def : GINodeEquiv<G_UDIVFIXSAT, udivfixsat>; def : GINodeEquiv<G_SELECT, select>; def : GINodeEquiv<G_FNEG, fneg>; def : GINodeEquiv<G_FPEXT, fpextend>; @@ -104,7 +116,7 @@ def : GINodeEquiv<G_CTTZ, cttz>; def : GINodeEquiv<G_CTLZ_ZERO_UNDEF, ctlz_zero_undef>; def : GINodeEquiv<G_CTTZ_ZERO_UNDEF, cttz_zero_undef>; def : GINodeEquiv<G_CTPOP, ctpop>; -def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>; +def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, extractelt>; def : GINodeEquiv<G_CONCAT_VECTORS, concat_vectors>; def : GINodeEquiv<G_BUILD_VECTOR, build_vector>; def : GINodeEquiv<G_FCEIL, fceil>; @@ -117,11 +129,13 @@ def : GINodeEquiv<G_FRINT, frint>; def : GINodeEquiv<G_FNEARBYINT, fnearbyint>; def : GINodeEquiv<G_INTRINSIC_TRUNC, ftrunc>; def : GINodeEquiv<G_INTRINSIC_ROUND, fround>; +def : GINodeEquiv<G_INTRINSIC_LRINT, lrint>; def : GINodeEquiv<G_FCOPYSIGN, fcopysign>; def : GINodeEquiv<G_SMIN, smin>; def : GINodeEquiv<G_SMAX, smax>; def : GINodeEquiv<G_UMIN, umin>; def : GINodeEquiv<G_UMAX, umax>; +def : GINodeEquiv<G_ABS, abs>; def : GINodeEquiv<G_FMINNUM, fminnum>; def : GINodeEquiv<G_FMAXNUM, fmaxnum>; def : GINodeEquiv<G_FMINNUM_IEEE, fminnum_ieee>; @@ -144,7 +158,7 @@ def : GINodeEquiv<G_STRICT_FSQRT, strict_fsqrt>; // separate nodes for them. This GINodeEquiv maps the non-atomic loads to // G_LOAD with a non-atomic MachineMemOperand. def : GINodeEquiv<G_LOAD, ld> { - let CheckMMOIsNonAtomic = 1; + let CheckMMOIsNonAtomic = true; let IfSignExtend = G_SEXTLOAD; let IfZeroExtend = G_ZEXTLOAD; } @@ -160,11 +174,17 @@ def : GINodeEquiv<G_ICMP, setcc> { // G_STORE handles both atomic and non-atomic stores where as SelectionDAG had // separate nodes for them. This GINodeEquiv maps the non-atomic stores to // G_STORE with a non-atomic MachineMemOperand. -def : GINodeEquiv<G_STORE, st> { let CheckMMOIsNonAtomic = 1; } +def : GINodeEquiv<G_STORE, st> { let CheckMMOIsNonAtomic = true; } def : GINodeEquiv<G_LOAD, atomic_load> { - let CheckMMOIsNonAtomic = 0; - let CheckMMOIsAtomic = 1; + let CheckMMOIsNonAtomic = false; + let CheckMMOIsAtomic = true; +} + +// Operands are swapped for atomic_store vs. regular store +def : GINodeEquiv<G_STORE, atomic_store> { + let CheckMMOIsNonAtomic = false; + let CheckMMOIsAtomic = true; } def : GINodeEquiv<G_ATOMIC_CMPXCHG, atomic_cmp_swap>; diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index aab5376db453..1c97d70a477f 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -110,9 +110,9 @@ class SubRegIndex<int size, int offset = 0> { // ComposedSubRegIndex - A sub-register that is the result of composing A and B. // Offset is set to the sum of A and B's Offsets. Size is set to B's Size. class ComposedSubRegIndex<SubRegIndex A, SubRegIndex B> - : SubRegIndex<B.Size, !if(!eq(A.Offset, -1), -1, - !if(!eq(B.Offset, -1), -1, - !add(A.Offset, B.Offset)))> { + : SubRegIndex<B.Size, !cond(!eq(A.Offset, -1): -1, + !eq(B.Offset, -1): -1, + true: !add(A.Offset, B.Offset))> { // See SubRegIndex. let ComposedOf = [A, B]; } @@ -175,12 +175,12 @@ class Register<string n, list<string> altNames = []> { // completely determined by the value of its sub-registers. For example, the // x86 register AX is covered by its sub-registers AL and AH, but EAX is not // covered by its sub-register AX. - bit CoveredBySubRegs = 0; + bit CoveredBySubRegs = false; // HWEncoding - The target specific hardware encoding for this register. bits<16> HWEncoding = 0; - bit isArtificial = 0; + bit isArtificial = false; } // RegisterWithSubRegs - This can be used to define instances of Register which @@ -252,7 +252,7 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment, // isAllocatable - Specify that the register class can be used for virtual // registers and register allocation. Some register classes are only used to // model instruction operand constraints, and should have isAllocatable = 0. - bit isAllocatable = 1; + bit isAllocatable = true; // AltOrders - List of alternative allocation orders. The default order is // MemberList itself, and that is good enough for most targets since the @@ -278,7 +278,7 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment, // Generate register pressure set for this register class and any class // synthesized from it. Set to 0 to inhibit unneeded pressure sets. - bit GeneratePressureSet = 1; + bit GeneratePressureSet = true; // Weight override for register pressure calculation. This is the value // TargetRegisterClass::getRegClassWeight() will return. The weight is in @@ -452,7 +452,7 @@ class InstructionEncoding { // DecodeInstB() is not able to determine if all possible values of ?? are // valid or not. If DecodeInstB() returns Fail the decoder will attempt to // decode the bitpattern as InstA too. - bit hasCompleteDecoder = 1; + bit hasCompleteDecoder = true; } // Allows specifying an InstructionEncoding by HwMode. If an Instruction specifies @@ -506,59 +506,59 @@ class Instruction : InstructionEncoding { // Indicates if this is a pre-isel opcode that should be // legalized/regbankselected/selected. - bit isPreISelOpcode = 0; + bit isPreISelOpcode = false; // These bits capture information about the high-level semantics of the // instruction. - bit isReturn = 0; // Is this instruction a return instruction? - bit isBranch = 0; // Is this instruction a branch instruction? - bit isEHScopeReturn = 0; // Does this instruction end an EH scope? - bit isIndirectBranch = 0; // Is this instruction an indirect branch? - bit isCompare = 0; // Is this instruction a comparison instruction? - bit isMoveImm = 0; // Is this instruction a move immediate instruction? - bit isMoveReg = 0; // Is this instruction a move register instruction? - bit isBitcast = 0; // Is this instruction a bitcast instruction? - bit isSelect = 0; // Is this instruction a select instruction? - bit isBarrier = 0; // Can control flow fall through this instruction? - bit isCall = 0; // Is this instruction a call instruction? - bit isAdd = 0; // Is this instruction an add instruction? - bit isTrap = 0; // Is this instruction a trap instruction? - bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand? - bit mayLoad = ?; // Is it possible for this inst to read memory? - bit mayStore = ?; // Is it possible for this inst to write memory? - bit mayRaiseFPException = 0; // Can this raise a floating-point exception? - bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote? - bit isCommutable = 0; // Is this 3 operand instruction commutable? - bit isTerminator = 0; // Is this part of the terminator for a basic block? - bit isReMaterializable = 0; // Is this instruction re-materializable? - bit isPredicable = 0; // 1 means this instruction is predicable - // even if it does not have any operand - // tablegen can identify as a predicate - bit isUnpredicable = 0; // 1 means this instruction is not predicable - // even if it _does_ have a predicate operand - bit hasDelaySlot = 0; // Does this instruction have an delay slot? - bit usesCustomInserter = 0; // Pseudo instr needing special help. - bit hasPostISelHook = 0; // To be *adjusted* after isel by target hook. - bit hasCtrlDep = 0; // Does this instruction r/w ctrl-flow chains? - bit isNotDuplicable = 0; // Is it unsafe to duplicate this instruction? - bit isConvergent = 0; // Is this instruction convergent? - bit isAuthenticated = 0; // Does this instruction authenticate a pointer? - bit isAsCheapAsAMove = 0; // As cheap (or cheaper) than a move instruction. - bit hasExtraSrcRegAllocReq = 0; // Sources have special regalloc requirement? - bit hasExtraDefRegAllocReq = 0; // Defs have special regalloc requirement? - bit isRegSequence = 0; // Is this instruction a kind of reg sequence? - // If so, make sure to override - // TargetInstrInfo::getRegSequenceLikeInputs. - bit isPseudo = 0; // Is this instruction a pseudo-instruction? - // If so, won't have encoding information for - // the [MC]CodeEmitter stuff. - bit isExtractSubreg = 0; // Is this instruction a kind of extract subreg? - // If so, make sure to override - // TargetInstrInfo::getExtractSubregLikeInputs. - bit isInsertSubreg = 0; // Is this instruction a kind of insert subreg? - // If so, make sure to override - // TargetInstrInfo::getInsertSubregLikeInputs. - bit variadicOpsAreDefs = 0; // Are variadic operands definitions? + bit isReturn = false; // Is this instruction a return instruction? + bit isBranch = false; // Is this instruction a branch instruction? + bit isEHScopeReturn = false; // Does this instruction end an EH scope? + bit isIndirectBranch = false; // Is this instruction an indirect branch? + bit isCompare = false; // Is this instruction a comparison instruction? + bit isMoveImm = false; // Is this instruction a move immediate instruction? + bit isMoveReg = false; // Is this instruction a move register instruction? + bit isBitcast = false; // Is this instruction a bitcast instruction? + bit isSelect = false; // Is this instruction a select instruction? + bit isBarrier = false; // Can control flow fall through this instruction? + bit isCall = false; // Is this instruction a call instruction? + bit isAdd = false; // Is this instruction an add instruction? + bit isTrap = false; // Is this instruction a trap instruction? + bit canFoldAsLoad = false; // Can this be folded as a simple memory operand? + bit mayLoad = ?; // Is it possible for this inst to read memory? + bit mayStore = ?; // Is it possible for this inst to write memory? + bit mayRaiseFPException = false; // Can this raise a floating-point exception? + bit isConvertibleToThreeAddress = false; // Can this 2-addr instruction promote? + bit isCommutable = false; // Is this 3 operand instruction commutable? + bit isTerminator = false; // Is this part of the terminator for a basic block? + bit isReMaterializable = false; // Is this instruction re-materializable? + bit isPredicable = false; // 1 means this instruction is predicable + // even if it does not have any operand + // tablegen can identify as a predicate + bit isUnpredicable = false; // 1 means this instruction is not predicable + // even if it _does_ have a predicate operand + bit hasDelaySlot = false; // Does this instruction have an delay slot? + bit usesCustomInserter = false; // Pseudo instr needing special help. + bit hasPostISelHook = false; // To be *adjusted* after isel by target hook. + bit hasCtrlDep = false; // Does this instruction r/w ctrl-flow chains? + bit isNotDuplicable = false; // Is it unsafe to duplicate this instruction? + bit isConvergent = false; // Is this instruction convergent? + bit isAuthenticated = false; // Does this instruction authenticate a pointer? + bit isAsCheapAsAMove = false; // As cheap (or cheaper) than a move instruction. + bit hasExtraSrcRegAllocReq = false; // Sources have special regalloc requirement? + bit hasExtraDefRegAllocReq = false; // Defs have special regalloc requirement? + bit isRegSequence = false; // Is this instruction a kind of reg sequence? + // If so, make sure to override + // TargetInstrInfo::getRegSequenceLikeInputs. + bit isPseudo = false; // Is this instruction a pseudo-instruction? + // If so, won't have encoding information for + // the [MC]CodeEmitter stuff. + bit isExtractSubreg = false; // Is this instruction a kind of extract subreg? + // If so, make sure to override + // TargetInstrInfo::getExtractSubregLikeInputs. + bit isInsertSubreg = false; // Is this instruction a kind of insert subreg? + // If so, make sure to override + // TargetInstrInfo::getInsertSubregLikeInputs. + bit variadicOpsAreDefs = false; // Are variadic operands definitions? // Does the instruction have side effects that are not captured by any // operands of the instruction or other flags? @@ -581,15 +581,15 @@ class Instruction : InstructionEncoding { // CodeEmitter unchanged, but duplicates a canonical instruction // definition's encoding and should be ignored when constructing the // assembler match tables. - bit isCodeGenOnly = 0; + bit isCodeGenOnly = false; // Is this instruction a pseudo instruction for use by the assembler parser. - bit isAsmParserOnly = 0; + bit isAsmParserOnly = false; // This instruction is not expected to be queried for scheduling latencies // and therefore needs no scheduling information even for a complete // scheduling model. - bit hasNoSchedulingInfo = 0; + bit hasNoSchedulingInfo = false; InstrItinClass Itinerary = NoItinerary;// Execution steps used for scheduling. @@ -630,13 +630,13 @@ class Instruction : InstructionEncoding { /// UseNamedOperandTable - If set, the operand indices of this instruction /// can be queried via the getNamedOperandIdx() function which is generated /// by TableGen. - bit UseNamedOperandTable = 0; + bit UseNamedOperandTable = false; /// Should FastISel ignore this instruction. For certain ISAs, they have /// instructions which map to the same ISD Opcode, value type operands and /// instruction selection predicates. FastISel cannot handle such cases, but /// SelectionDAG can. - bit FastISelShouldIgnore = 0; + bit FastISelShouldIgnore = false; } /// Defines an additional encoding that disassembles to the given instruction @@ -651,7 +651,7 @@ class AdditionalEncoding<Instruction I> : InstructionEncoding { /// pseudo. class PseudoInstExpansion<dag Result> { dag ResultInst = Result; // The instruction to generate. - bit isPseudo = 1; + bit isPseudo = true; } /// Predicates - These are extra conditionals which are turned into instruction @@ -662,7 +662,7 @@ class Predicate<string cond> { /// AssemblerMatcherPredicate - If this feature can be used by the assembler /// matcher, this is true. Targets should set this by inheriting their /// feature from the AssemblerPredicate class in addition to Predicate. - bit AssemblerMatcherPredicate = 0; + bit AssemblerMatcherPredicate = false; /// AssemblerCondDag - Set of subtarget features being tested used /// as alternative condition string used for assembler matcher. Must be used @@ -688,7 +688,7 @@ class Predicate<string cond> { /// every function change. Most predicates can leave this at '0'. /// /// Ignored by SelectionDAG, it always recomputes the predicate on every use. - bit RecomputePerFunction = 0; + bit RecomputePerFunction = false; } /// NoHonorSignDependentRounding - This predicate is true if support for @@ -788,7 +788,7 @@ class AsmOperandClass { /// marked as IsOptional. /// /// Optional arguments must be at the end of the operand list. - bit IsOptional = 0; + bit IsOptional = false; /// The name of the method on the target specific asm parser that returns the /// default operand for this optional operand. This method is only used if @@ -809,7 +809,7 @@ class Operand<ValueType ty> : DAGOperand { ValueType Type = ty; string PrintMethod = "printOperand"; string EncoderMethod = ""; - bit hasCompleteDecoder = 1; + bit hasCompleteDecoder = true; string OperandType = "OPERAND_UNKNOWN"; dag MIOperandInfo = (ops); @@ -877,8 +877,8 @@ def f64imm : Operand<f64>; // have the same LLT). class TypedOperand<string Ty> : Operand<untyped> { let OperandType = Ty; - bit IsPointer = 0; - bit IsImmediate = 0; + bit IsPointer = false; + bit IsImmediate = false; } def type0 : TypedOperand<"OPERAND_GENERIC_0">; @@ -888,7 +888,7 @@ def type3 : TypedOperand<"OPERAND_GENERIC_3">; def type4 : TypedOperand<"OPERAND_GENERIC_4">; def type5 : TypedOperand<"OPERAND_GENERIC_5">; -let IsPointer = 1 in { +let IsPointer = true in { def ptype0 : TypedOperand<"OPERAND_GENERIC_0">; def ptype1 : TypedOperand<"OPERAND_GENERIC_1">; def ptype2 : TypedOperand<"OPERAND_GENERIC_2">; @@ -900,7 +900,7 @@ let IsPointer = 1 in { // untyped_imm is for operands where isImm() will be true. It currently has no // special behaviour and is only used for clarity. def untyped_imm_0 : TypedOperand<"OPERAND_GENERIC_IMM_0"> { - let IsImmediate = 1; + let IsImmediate = true; } /// zero_reg definition - Special node to stand for the zero register. @@ -952,7 +952,7 @@ class InstrInfo { // For instance, while both Sparc and PowerPC are big-endian platforms, the // Sparc manual specifies its instructions in the format [31..0] (big), while // PowerPC specifies them using the format [0..31] (little). - bit isLittleEndianEncoding = 0; + bit isLittleEndianEncoding = false; // The instruction properties mayLoad, mayStore, and hasSideEffects are unset // by default, and TableGen will infer their value from the instruction @@ -963,7 +963,7 @@ class InstrInfo { // is set, it will guess a safe value instead. // // This option is a temporary migration help. It will go away. - bit guessInstructionProperties = 1; + bit guessInstructionProperties = true; // TableGen's instruction encoder generator has support for matching operands // to bit-field variables both by name and by position. While matching by @@ -975,7 +975,7 @@ class InstrInfo { // This option is temporary; it will go away once the TableGen decoder // generator has better support for complex operands and targets have // migrated away from using positionally encoded operands. - bit decodePositionallyEncodedOperands = 0; + bit decodePositionallyEncodedOperands = false; // When set, this indicates that there will be no overlap between those // operands that are matched by ordering (positional operands) and those @@ -984,7 +984,7 @@ class InstrInfo { // This option is temporary; it will go away once the TableGen decoder // generator has better support for complex operands and targets have // migrated away from using positionally encoded operands. - bit noNamedPositionallyEncodedOperands = 0; + bit noNamedPositionallyEncodedOperands = false; } // Standard Pseudo Instructions. @@ -994,31 +994,31 @@ class InstrInfo { // targets that set guessInstructionProperties=0. Any local definition of // mayLoad/mayStore takes precedence over these default values. class StandardPseudoInstruction : Instruction { - let mayLoad = 0; - let mayStore = 0; - let isCodeGenOnly = 1; - let isPseudo = 1; - let hasNoSchedulingInfo = 1; + let mayLoad = false; + let mayStore = false; + let isCodeGenOnly = true; + let isPseudo = true; + let hasNoSchedulingInfo = true; let Namespace = "TargetOpcode"; } def PHI : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); let AsmString = "PHINODE"; - let hasSideEffects = 0; + let hasSideEffects = false; } def INLINEASM : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = ""; - let hasSideEffects = 0; // Note side effect is encoded in an operand. + let hasSideEffects = false; // Note side effect is encoded in an operand. } def INLINEASM_BR : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = ""; // Unlike INLINEASM, this is always treated as having side-effects. - let hasSideEffects = 1; + let hasSideEffects = true; // Despite potentially branching, this instruction is intentionally _not_ // marked as a terminator or a branch. } @@ -1026,164 +1026,177 @@ def CFI_INSTRUCTION : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = ""; - let hasCtrlDep = 1; - let hasSideEffects = 0; - let isNotDuplicable = 1; + let hasCtrlDep = true; + let hasSideEffects = false; + let isNotDuplicable = true; } def EH_LABEL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = ""; - let hasCtrlDep = 1; - let hasSideEffects = 0; - let isNotDuplicable = 1; + let hasCtrlDep = true; + let hasSideEffects = false; + let isNotDuplicable = true; } def GC_LABEL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = ""; - let hasCtrlDep = 1; - let hasSideEffects = 0; - let isNotDuplicable = 1; + let hasCtrlDep = true; + let hasSideEffects = false; + let isNotDuplicable = true; } def ANNOTATION_LABEL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = ""; - let hasCtrlDep = 1; - let hasSideEffects = 0; - let isNotDuplicable = 1; + let hasCtrlDep = true; + let hasSideEffects = false; + let isNotDuplicable = true; } def KILL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = ""; - let hasSideEffects = 0; + let hasSideEffects = false; } def EXTRACT_SUBREG : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$supersrc, i32imm:$subidx); let AsmString = ""; - let hasSideEffects = 0; + let hasSideEffects = false; } def INSERT_SUBREG : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$supersrc, unknown:$subsrc, i32imm:$subidx); let AsmString = ""; - let hasSideEffects = 0; + let hasSideEffects = false; let Constraints = "$supersrc = $dst"; } def IMPLICIT_DEF : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins); let AsmString = ""; - let hasSideEffects = 0; - let isReMaterializable = 1; - let isAsCheapAsAMove = 1; + let hasSideEffects = false; + let isReMaterializable = true; + let isAsCheapAsAMove = true; } def SUBREG_TO_REG : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$implsrc, unknown:$subsrc, i32imm:$subidx); let AsmString = ""; - let hasSideEffects = 0; + let hasSideEffects = false; } def COPY_TO_REGCLASS : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$src, i32imm:$regclass); let AsmString = ""; - let hasSideEffects = 0; - let isAsCheapAsAMove = 1; + let hasSideEffects = false; + let isAsCheapAsAMove = true; } def DBG_VALUE : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "DBG_VALUE"; - let hasSideEffects = 0; + let hasSideEffects = false; +} +def DBG_INSTR_REF : StandardPseudoInstruction { + let OutOperandList = (outs); + let InOperandList = (ins variable_ops); + let AsmString = "DBG_INSTR_REF"; + let hasSideEffects = false; } def DBG_LABEL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins unknown:$label); let AsmString = "DBG_LABEL"; - let hasSideEffects = 0; + let hasSideEffects = false; } def REG_SEQUENCE : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$supersrc, variable_ops); let AsmString = ""; - let hasSideEffects = 0; - let isAsCheapAsAMove = 1; + let hasSideEffects = false; + let isAsCheapAsAMove = true; } def COPY : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$src); let AsmString = ""; - let hasSideEffects = 0; - let isAsCheapAsAMove = 1; - let hasNoSchedulingInfo = 0; + let hasSideEffects = false; + let isAsCheapAsAMove = true; + let hasNoSchedulingInfo = false; } def BUNDLE : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "BUNDLE"; - let hasSideEffects = 0; + let hasSideEffects = false; } def LIFETIME_START : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = "LIFETIME_START"; - let hasSideEffects = 0; + let hasSideEffects = false; } def LIFETIME_END : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = "LIFETIME_END"; - let hasSideEffects = 0; + let hasSideEffects = false; +} +def PSEUDO_PROBE : StandardPseudoInstruction { + let OutOperandList = (outs); + let InOperandList = (ins i64imm:$guid, i64imm:$index, i8imm:$type, i32imm:$attr); + let AsmString = "PSEUDO_PROBE"; + let hasSideEffects = 1; } + def STACKMAP : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i64imm:$id, i32imm:$nbytes, variable_ops); - let hasSideEffects = 1; - let isCall = 1; - let mayLoad = 1; - let usesCustomInserter = 1; + let hasSideEffects = true; + let isCall = true; + let mayLoad = true; + let usesCustomInserter = true; } def PATCHPOINT : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins i64imm:$id, i32imm:$nbytes, unknown:$callee, i32imm:$nargs, i32imm:$cc, variable_ops); - let hasSideEffects = 1; - let isCall = 1; - let mayLoad = 1; - let usesCustomInserter = 1; + let hasSideEffects = true; + let isCall = true; + let mayLoad = true; + let usesCustomInserter = true; } def STATEPOINT : StandardPseudoInstruction { - let OutOperandList = (outs); + let OutOperandList = (outs variable_ops); let InOperandList = (ins variable_ops); - let usesCustomInserter = 1; - let mayLoad = 1; - let mayStore = 1; - let hasSideEffects = 1; - let isCall = 1; + let usesCustomInserter = true; + let mayLoad = true; + let mayStore = true; + let hasSideEffects = true; + let isCall = true; } def LOAD_STACK_GUARD : StandardPseudoInstruction { let OutOperandList = (outs ptr_rc:$dst); let InOperandList = (ins); - let mayLoad = 1; - bit isReMaterializable = 1; - let hasSideEffects = 0; - bit isPseudo = 1; + let mayLoad = true; + bit isReMaterializable = true; + let hasSideEffects = false; + bit isPseudo = true; } def PREALLOCATED_SETUP : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$a); - let usesCustomInserter = 1; - let hasSideEffects = 1; + let usesCustomInserter = true; + let hasSideEffects = true; } def PREALLOCATED_ARG : StandardPseudoInstruction { let OutOperandList = (outs ptr_rc:$loc); let InOperandList = (ins i32imm:$a, i32imm:$b); - let usesCustomInserter = 1; - let hasSideEffects = 1; + let usesCustomInserter = true; + let hasSideEffects = true; } def LOCAL_ESCAPE : StandardPseudoInstruction { // This instruction is really just a label. It has to be part of the chain so @@ -1191,93 +1204,94 @@ def LOCAL_ESCAPE : StandardPseudoInstruction { // no side effects. let OutOperandList = (outs); let InOperandList = (ins ptr_rc:$symbol, i32imm:$id); - let hasSideEffects = 0; - let hasCtrlDep = 1; + let hasSideEffects = false; + let hasCtrlDep = true; } def FAULTING_OP : StandardPseudoInstruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); - let usesCustomInserter = 1; - let hasSideEffects = 1; - let mayLoad = 1; - let mayStore = 1; - let isTerminator = 1; - let isBranch = 1; + let usesCustomInserter = true; + let hasSideEffects = true; + let mayLoad = true; + let mayStore = true; + let isTerminator = true; + let isBranch = true; } def PATCHABLE_OP : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); - let usesCustomInserter = 1; - let mayLoad = 1; - let mayStore = 1; - let hasSideEffects = 1; + let usesCustomInserter = true; + let mayLoad = true; + let mayStore = true; + let hasSideEffects = true; } def PATCHABLE_FUNCTION_ENTER : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins); let AsmString = "# XRay Function Enter."; - let usesCustomInserter = 1; - let hasSideEffects = 1; + let usesCustomInserter = true; + let hasSideEffects = true; } def PATCHABLE_RET : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "# XRay Function Patchable RET."; - let usesCustomInserter = 1; - let hasSideEffects = 1; - let isTerminator = 1; - let isReturn = 1; + let usesCustomInserter = true; + let hasSideEffects = true; + let isTerminator = true; + let isReturn = true; } def PATCHABLE_FUNCTION_EXIT : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins); let AsmString = "# XRay Function Exit."; - let usesCustomInserter = 1; - let hasSideEffects = 1; - let isReturn = 0; // Original return instruction will follow + let usesCustomInserter = true; + let hasSideEffects = true; + let isReturn = false; // Original return instruction will follow } def PATCHABLE_TAIL_CALL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "# XRay Tail Call Exit."; - let usesCustomInserter = 1; - let hasSideEffects = 1; - let isReturn = 1; + let usesCustomInserter = true; + let hasSideEffects = true; + let isReturn = true; } def PATCHABLE_EVENT_CALL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins ptr_rc:$event, unknown:$size); let AsmString = "# XRay Custom Event Log."; - let usesCustomInserter = 1; - let isCall = 1; - let mayLoad = 1; - let mayStore = 1; - let hasSideEffects = 1; + let usesCustomInserter = true; + let isCall = true; + let mayLoad = true; + let mayStore = true; + let hasSideEffects = true; } def PATCHABLE_TYPED_EVENT_CALL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins unknown:$type, ptr_rc:$event, unknown:$size); let AsmString = "# XRay Typed Event Log."; - let usesCustomInserter = 1; - let isCall = 1; - let mayLoad = 1; - let mayStore = 1; - let hasSideEffects = 1; + let usesCustomInserter = true; + let isCall = true; + let mayLoad = true; + let mayStore = true; + let hasSideEffects = true; } def FENTRY_CALL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins); let AsmString = "# FEntry call"; - let usesCustomInserter = 1; - let mayLoad = 1; - let mayStore = 1; - let hasSideEffects = 1; + let usesCustomInserter = true; + let isCall = true; + let mayLoad = true; + let mayStore = true; + let hasSideEffects = true; } def ICALL_BRANCH_FUNNEL : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = ""; - let hasSideEffects = 1; + let hasSideEffects = true; } // Generic opcodes used in GlobalISel. @@ -1303,7 +1317,7 @@ class AsmParser { // ShouldEmitMatchRegisterName - Set to false if the target needs a hand // written register name matcher - bit ShouldEmitMatchRegisterName = 1; + bit ShouldEmitMatchRegisterName = true; // Set to true if the target needs a generated 'alternative register name' // matcher. @@ -1311,7 +1325,7 @@ class AsmParser { // This generates a function which can be used to lookup registers from // their aliases. This function will fail when called on targets where // several registers share the same alias (i.e. not a 1:1 mapping). - bit ShouldEmitMatchRegisterAltName = 0; + bit ShouldEmitMatchRegisterAltName = false; // Set to true if MatchRegisterName and MatchRegisterAltName functions // should be generated even if there are duplicate register names. The @@ -1319,11 +1333,11 @@ class AsmParser { // (e.g. in validateTargetOperandClass), and there are no guarantees about // which numeric register identifier will be returned in the case of // multiple matches. - bit AllowDuplicateRegisterNames = 0; + bit AllowDuplicateRegisterNames = false; // HasMnemonicFirst - Set to false if target instructions don't always // start with a mnemonic as the first token. - bit HasMnemonicFirst = 1; + bit HasMnemonicFirst = true; // ReportMultipleNearMisses - // When 0, the assembly matcher reports an error for one encoding or operand @@ -1331,7 +1345,7 @@ class AsmParser { // When 1, the assembly matcher returns a list of encodings that were close // to matching the parsed instruction, so to allow more detailed error // messages. - bit ReportMultipleNearMisses = 0; + bit ReportMultipleNearMisses = false; } def DefaultAsmParser : AsmParser; @@ -1342,7 +1356,7 @@ def DefaultAsmParser : AsmParser; // class AsmParserVariant { // Variant - AsmParsers can be of multiple different variants. Variants are - // used to support targets that need to parser multiple formats for the + // used to support targets that need to parse multiple formats for the // assembly language. int Variant = 0; @@ -1378,7 +1392,7 @@ def all_of; /// AssemblerPredicate - This is a Predicate that can be used when the assembler /// matches instructions and aliases. class AssemblerPredicate<dag cond, string name = ""> { - bit AssemblerMatcherPredicate = 1; + bit AssemblerMatcherPredicate = true; dag AssemblerCondDag = cond; string PredicateName = name; } @@ -1453,7 +1467,7 @@ class InstAlias<string Asm, dag Result, int Emit = 1, string VariantName = ""> { // Setting this to 0 will cause the alias to ignore the Result instruction's // defined AsmMatchConverter and instead use the function generated by the // dag Result. - bit UseInstAsmMatchConverter = 1; + bit UseInstAsmMatchConverter = true; // Assembler variant name to use for this alias. If not specified then // assembler variants will be determined based on AsmString @@ -1558,7 +1572,8 @@ class ComplexDeprecationPredicate<string dep> { // by the scheduler. Each Processor definition requires corresponding // instruction itineraries. // -class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f> { +class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f, + list<SubtargetFeature> tunef = []> { // Name - Chip set name. Used by command line (-mcpu=) to determine the // appropriate target chip. // @@ -1574,6 +1589,12 @@ class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f> { // Features - list of list<SubtargetFeature> Features = f; + + // TuneFeatures - list of features for tuning for this CPU. If the target + // supports -mtune, this should contain the list of features used to make + // microarchitectural optimization decisions for a given processor. While + // Features should contain the architectural features for the processor. + list<SubtargetFeature> TuneFeatures = tunef; } // ProcessorModel allows subtargets to specify the more general @@ -1582,8 +1603,9 @@ class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f> { // // Although this class always passes NoItineraries to the Processor // class, the SchedMachineModel may still define valid Itineraries. -class ProcessorModel<string n, SchedMachineModel m, list<SubtargetFeature> f> - : Processor<n, NoItineraries, f> { +class ProcessorModel<string n, SchedMachineModel m, list<SubtargetFeature> f, + list<SubtargetFeature> tunef = []> + : Processor<n, NoItineraries, f, tunef> { let SchedModel = m; } diff --git a/llvm/include/llvm/Target/TargetCallingConv.td b/llvm/include/llvm/Target/TargetCallingConv.td index 057f33083e08..b3d4fe9d0dbb 100644 --- a/llvm/include/llvm/Target/TargetCallingConv.td +++ b/llvm/include/llvm/Target/TargetCallingConv.td @@ -187,15 +187,15 @@ class CallingConv<list<CCAction> actions> { /// If true, this calling convention will be emitted as externally visible in /// the llvm namespaces instead of as a static function. - bit Entry = 0; + bit Entry = false; - bit Custom = 0; + bit Custom = false; } /// CustomCallingConv - An instance of this is used to declare calling /// conventions that are implemented using a custom function of the same name. class CustomCallingConv : CallingConv<[]> { - let Custom = 1; + let Custom = true; } /// CalleeSavedRegs - A list of callee saved registers for a given calling diff --git a/llvm/include/llvm/Target/TargetInstrPredicate.td b/llvm/include/llvm/Target/TargetInstrPredicate.td index 5623461c648d..9f2cde9d9230 100644 --- a/llvm/include/llvm/Target/TargetInstrPredicate.td +++ b/llvm/include/llvm/Target/TargetInstrPredicate.td @@ -11,7 +11,7 @@ // MCInstPredicate definitions are used by target scheduling models to describe // constraints on instructions. // -// Here is an example of an MCInstPredicate definition in tablegen: +// Here is an example of an MCInstPredicate definition in TableGen: // // def MCInstPredicateExample : CheckAll<[ // CheckOpcode<[BLR]>, @@ -126,6 +126,11 @@ class CheckRegOperand<int Index, Register R> : CheckOperandBase<Index> { // Check if register operand at index `Index` is the invalid register. class CheckInvalidRegOperand<int Index> : CheckOperandBase<Index>; +// Return true if machine operand at position `Index` is a valid +// register operand. +class CheckValidRegOperand<int Index> : + CheckNot<CheckInvalidRegOperand<Index>>; + // Check that the operand at position `Index` is immediate `Imm`. // If field `FunctionMapper` is a non-empty string, then function // `FunctionMapper` is applied to the operand value, and the return value is then @@ -254,6 +259,20 @@ class CheckFunctionPredicate<string MCInstFn, string MachineInstrFn> : MCInstPre string MachineInstrFnName = MachineInstrFn; } +// Similar to CheckFunctionPredicate. However it assumes that MachineInstrFn is +// a method in TargetInstrInfo, and MCInstrFn takes an extra pointer to +// MCInstrInfo. +// +// It Expands to: +// - TIIPointer->MachineInstrFn(MI) +// - MCInstrFn(MI, MCII); +class CheckFunctionPredicateWithTII<string MCInstFn, string MachineInstrFn, string +TIIPointer = "TII"> : MCInstPredicate { + string MCInstFnName = MCInstFn; + string TIIPtrName = TIIPointer; + string MachineInstrFnName = MachineInstrFn; +} + // Used to classify machine instructions based on a machine instruction // predicate. // @@ -300,8 +319,8 @@ class DepBreakingClass<list<Instruction> opcodes, MCInstPredicate pred, // - A list of subtarget hooks (Delegates) that are called from this function. // class STIPredicateDecl<string name, MCInstPredicate default = FalsePred, - bit overrides = 1, bit expandForMC = 1, - bit updatesOpcodeMask = 0, + bit overrides = true, bit expandForMC = true, + bit updatesOpcodeMask = false, list<STIPredicateDecl> delegates = []> { string Name = name; @@ -336,7 +355,7 @@ class STIPredicate<STIPredicateDecl declaration, // Convenience classes and definitions used by processor scheduling models to // describe dependency breaking instructions and move elimination candidates. -let UpdatesOpcodeMask = 1 in { +let UpdatesOpcodeMask = true in { def IsZeroIdiomDecl : STIPredicateDecl<"isZeroIdiom">; diff --git a/llvm/include/llvm/Target/TargetItinerary.td b/llvm/include/llvm/Target/TargetItinerary.td index d364fab038b5..a432d4e42b61 100644 --- a/llvm/include/llvm/Target/TargetItinerary.td +++ b/llvm/include/llvm/Target/TargetItinerary.td @@ -8,7 +8,7 @@ // // This file defines the target-independent scheduling interfaces // which should be implemented by each target that uses instruction -// itineraries for scheduling. Itineraries are details reservation +// itineraries for scheduling. Itineraries are detailed reservation // tables for each instruction class. They are most appropriate for // in-order machine with complicated scheduling or bundling constraints. // diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h index cc6c93b6ee2b..ff27ceaeac35 100644 --- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -38,6 +38,7 @@ class Module; class SectionKind; class StringRef; class TargetMachine; +class DSOLocalEquivalent; class TargetLoweringObjectFile : public MCObjectFileInfo { /// Name-mangler for global names. @@ -47,6 +48,7 @@ protected: bool SupportIndirectSymViaGOTPCRel = false; bool SupportGOTPCRelWithOffset = true; bool SupportDebugThreadLocalLocation = true; + bool SupportDSOLocalEquivalentLowering = false; /// PersonalityEncoding, LSDAEncoding, TTypeEncoding - Some encoding values /// for EH. @@ -61,6 +63,8 @@ protected: /// This section contains the static destructor pointer list. MCSection *StaticDtorSection = nullptr; + const TargetMachine *TM = nullptr; + public: TargetLoweringObjectFile() = default; TargetLoweringObjectFile(const TargetLoweringObjectFile &) = delete; @@ -81,6 +85,9 @@ public: /// Emit the module-level metadata that the platform cares about. virtual void emitModuleMetadata(MCStreamer &Streamer, Module &M) const {} + /// Emit Call Graph Profile metadata. + void emitCGProfileMetadata(MCStreamer &Streamer, Module &M) const; + /// Get the module-level metadata that the platform cares about. virtual void getModuleMetadata(Module &M) {} @@ -118,6 +125,10 @@ public: virtual MCSection *getSectionForJumpTable(const Function &F, const TargetMachine &TM) const; + virtual MCSection *getSectionForLSDA(const Function &F, + const TargetMachine &TM) const { + return LSDASection; + } virtual bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference, const Function &F) const; @@ -151,7 +162,7 @@ public: unsigned getPersonalityEncoding() const { return PersonalityEncoding; } unsigned getLSDAEncoding() const { return LSDAEncoding; } unsigned getTTypeEncoding() const { return TTypeEncoding; } - unsigned getCallSiteEncoding() const { return CallSiteEncoding; } + unsigned getCallSiteEncoding() const; const MCExpr *getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding, MCStreamer &Streamer) const; @@ -176,6 +187,17 @@ public: return nullptr; } + /// Target supports a native lowering of a dso_local_equivalent constant + /// without needing to replace it with equivalent IR. + bool supportDSOLocalEquivalentLowering() const { + return SupportDSOLocalEquivalentLowering; + } + + virtual const MCExpr *lowerDSOLocalEquivalent(const DSOLocalEquivalent *Equiv, + const TargetMachine &TM) const { + return nullptr; + } + /// Target supports replacing a data "PC"-relative access to a symbol /// through another symbol, by accessing the later via a GOT entry instead? bool supportIndirectSymViaGOTPCRel() const { @@ -203,12 +225,6 @@ public: return nullptr; } - virtual void emitLinkerFlagsForGlobal(raw_ostream &OS, - const GlobalValue *GV) const {} - - virtual void emitLinkerFlagsForUsed(raw_ostream &OS, - const GlobalValue *GV) const {} - /// If supported, return the section to use for the llvm.commandline /// metadata. Otherwise, return nullptr. virtual MCSection *getSectionForCommandLines() const { @@ -226,7 +242,8 @@ public: /// On targets that support TOC entries, return a section for the entry given /// the symbol it refers to. /// TODO: Implement this interface for existing ELF targets. - virtual MCSection *getSectionForTOCEntry(const MCSymbol *S) const { + virtual MCSection *getSectionForTOCEntry(const MCSymbol *S, + const TargetMachine &TM) const { return nullptr; } @@ -247,7 +264,8 @@ public: /// If supported, return the function entry point symbol. /// Otherwise, returns nulltpr. - virtual MCSymbol *getFunctionEntryPointSymbol(const Function *F, + /// Func must be a function or an alias which has a function as base object. + virtual MCSymbol *getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const { return nullptr; } diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h index 6d539f1145ee..f9a054dbed3d 100644 --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -16,24 +16,36 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/Error.h" +#include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetOptions.h" #include <string> namespace llvm { +class AAManager; +template <typename IRUnitT, typename AnalysisManagerT, typename... ExtraArgTs> +class PassManager; +using ModulePassManager = PassManager<Module>; + class Function; class GlobalValue; +class MachineFunctionPassManager; +class MachineFunctionAnalysisManager; class MachineModuleInfoWrapperPass; class Mangler; class MCAsmInfo; class MCContext; class MCInstrInfo; class MCRegisterInfo; +class MCStreamer; class MCSubtargetInfo; class MCSymbol; class raw_pwrite_stream; +class PassBuilder; class PassManagerBuilder; struct PerFunctionMIParsingState; class SMDiagnostic; @@ -111,6 +123,7 @@ public: const Triple &getTargetTriple() const { return TargetTriple; } StringRef getTargetCPU() const { return TargetCPU; } StringRef getTargetFeatureString() const { return TargetFS; } + void setTargetFeatureString(StringRef FS) { TargetFS = std::string(FS); } /// Virtual method implemented by subclasses that returns a reference to that /// target's TargetSubtargetInfo-derived member variable. @@ -241,7 +254,9 @@ public: Options.SupportsDebugEntryValues = Enable; } - bool shouldPrintMachineCode() const { return Options.PrintMachineCode; } + bool getAIXExtendedAltivecABI() const { + return Options.EnableAIXExtendedAltivecABI; + } bool getUniqueSectionNames() const { return Options.UniqueSectionNames; } @@ -262,6 +277,16 @@ public: return Options.FunctionSections; } + /// Return true if visibility attribute should not be emitted in XCOFF, + /// corresponding to -mignore-xcoff-visibility. + bool getIgnoreXCOFFVisibility() const { + return Options.IgnoreXCOFFVisibility; + } + + /// Return true if XCOFF traceback table should be emitted, + /// corresponding to -xcoff-traceback-table. + bool getXCOFFTracebackTable() const { return Options.XCOFFTracebackTable; } + /// If basic blocks should be emitted into their own section, /// corresponding to -fbasic-block-sections. llvm::BasicBlockSection getBBSectionsType() const { @@ -273,6 +298,19 @@ public: return Options.BBSectionsFuncListBuf.get(); } + /// Returns true if a cast between SrcAS and DestAS is a noop. + virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { + return false; + } + + /// If the specified generic pointer could be assumed as a pointer to a + /// specific address space, return that address space. + /// + /// Under offloading programming, the offloading target may be passed with + /// values only prepared on the host side and could assume certain + /// properties. + virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; } + /// Get a \c TargetIRAnalysis appropriate for the target. /// /// This is used to construct the new pass manager's target IR analysis pass, @@ -290,6 +328,15 @@ public: /// PassManagerBuilder::addExtension. virtual void adjustPassManager(PassManagerBuilder &) {} + /// Allow the target to modify the pass pipeline with New Pass Manager + /// (similar to adjustPassManager for Legacy Pass manager). + virtual void registerPassBuilderCallbacks(PassBuilder &, + bool DebugPassManager) {} + + /// Allow the target to register alias analyses with the AAManager for use + /// with the new pass manager. Only affects the "default" AAManager. + virtual void registerDefaultAliasAnalyses(AAManager &) {} + /// Add passes to the specified pass manager to get the specified file /// emitted. Typically this will involve several steps of code generation. /// This method should return true if emission of this file type is not @@ -329,6 +376,8 @@ public: /// The integer bit size to use for SjLj based exception handling. static constexpr unsigned DefaultSjLjDataSize = 32; virtual unsigned getSjLjDataSize() const { return DefaultSjLjDataSize; } + + static std::pair<int, int> parseBinutilsVersion(StringRef Version); }; /// This class describes a target machine that is implemented with the LLVM @@ -364,6 +413,21 @@ public: bool DisableVerify = true, MachineModuleInfoWrapperPass *MMIWP = nullptr) override; + virtual Error buildCodeGenPipeline(ModulePassManager &, + MachineFunctionPassManager &, + MachineFunctionAnalysisManager &, + raw_pwrite_stream &, raw_pwrite_stream *, + CodeGenFileType, CGPassBuilderOption, + PassInstrumentationCallbacks *) { + return make_error<StringError>("buildCodeGenPipeline is not overriden", + inconvertibleErrorCode()); + } + + virtual std::pair<StringRef, bool> getPassNameFromLegacyName(StringRef) { + llvm_unreachable( + "getPassNameFromLegacyName parseMIRPipeline is not overriden"); + } + /// Add passes to the specified pass manager to get machine code emitted with /// the MCJIT. This method returns true if machine code is not supported. It /// fills the MCContext Ctx pointer which can be used to build custom @@ -384,6 +448,10 @@ public: raw_pwrite_stream *DwoOut, CodeGenFileType FileType, MCContext &Context); + Expected<std::unique_ptr<MCStreamer>> + createMCStreamer(raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, + CodeGenFileType FileType, MCContext &Ctx); + /// True if the target uses physical regs (as nearly all targets do). False /// for stack machines such as WebAssembly and other virtual-register /// machines. If true, all vregs must be allocated before PEI. If false, then diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index d73686b2bdd8..fd014d46e758 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -67,9 +67,18 @@ namespace llvm { Labels, // Do not use Basic Block Sections but label basic blocks. This // is useful when associating profile counts from virtual addresses // to basic blocks. + Preset, // Similar to list but the blocks are identified by passes which + // seek to use Basic Block Sections, e.g. MachineFunctionSplitter. + // This option cannot be set via the command line. None // Do not use Basic Block Sections. }; + enum class StackProtectorGuards { + None, + TLS, + Global + }; + enum class EABI { Unknown, Default, // Default means not specified @@ -113,33 +122,34 @@ namespace llvm { class TargetOptions { public: TargetOptions() - : PrintMachineCode(false), UnsafeFPMath(false), NoInfsFPMath(false), - NoNaNsFPMath(false), NoTrappingFPMath(true), - NoSignedZerosFPMath(false), + : UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false), + NoTrappingFPMath(true), NoSignedZerosFPMath(false), + EnableAIXExtendedAltivecABI(false), HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false), DisableIntegratedAS(false), RelaxELFRelocations(false), FunctionSections(false), DataSections(false), + IgnoreXCOFFVisibility(false), XCOFFTracebackTable(true), UniqueSectionNames(true), UniqueBasicBlockSectionNames(false), TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false), EmitStackSizeSection(false), EnableMachineOutliner(false), - SupportsDefaultOutlining(false), EmitAddrsig(false), - EmitCallSiteInfo(false), SupportsDebugEntryValues(false), - EnableDebugEntryValues(false), ForceDwarfFrameSection(false), - XRayOmitFunctionIndex(false), + EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false), + EmitAddrsig(false), EmitCallSiteInfo(false), + SupportsDebugEntryValues(false), EnableDebugEntryValues(false), + PseudoProbeForProfiling(false), ValueTrackingVariableLocations(false), + ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false), FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {} - /// PrintMachineCode - This flag is enabled when the -print-machineinstrs - /// option is specified on the command line, and should enable debugging - /// output from the code generator. - unsigned PrintMachineCode : 1; - /// DisableFramePointerElim - This returns true if frame pointer elimination /// optimization should be disabled for the given machine function. bool DisableFramePointerElim(const MachineFunction &MF) const; + /// If greater than 0, override the default value of + /// MCAsmInfo::BinutilsVersion. + std::pair<int, int> BinutilsVersion{0, 0}; + /// UnsafeFPMath - This flag is enabled when the /// -enable-unsafe-fp-math flag is specified on the command line. When /// this flag is off (the default), the code generator is not allowed to @@ -170,6 +180,12 @@ namespace llvm { /// argument or result as insignificant. unsigned NoSignedZerosFPMath : 1; + /// EnableAIXExtendedAltivecABI - This flag returns true when -vec-extabi is + /// specified. The code generator is then able to use both volatile and + /// nonvolitle vector regisers. When false, the code generator only uses + /// volatile vector registers which is the default setting on AIX. + unsigned EnableAIXExtendedAltivecABI : 1; + /// HonorSignDependentRoundingFPMath - This returns true when the /// -enable-sign-dependent-rounding-fp-math is specified. If this returns /// false (the default), the code generator is allowed to assume that the @@ -232,6 +248,12 @@ namespace llvm { /// Emit data into separate sections. unsigned DataSections : 1; + /// Do not emit visibility attribute for xcoff. + unsigned IgnoreXCOFFVisibility : 1; + + /// Emit XCOFF traceback table. + unsigned XCOFFTracebackTable : 1; + unsigned UniqueSectionNames : 1; /// Use unique names for basic block sections. @@ -263,6 +285,9 @@ namespace llvm { /// Enables the MachineOutliner pass. unsigned EnableMachineOutliner : 1; + /// Enables the MachineFunctionSplitter pass. + unsigned EnableMachineFunctionSplitter : 1; + /// Set if the target supports default outlining behaviour. unsigned SupportsDefaultOutlining : 1; @@ -291,12 +316,30 @@ namespace llvm { /// production. bool ShouldEmitDebugEntryValues() const; + /// Emit pseudo probes into the binary for sample profiling + unsigned PseudoProbeForProfiling : 1; + + // When set to true, use experimental new debug variable location tracking, + // which seeks to follow the values of variables rather than their location, + // post isel. + unsigned ValueTrackingVariableLocations : 1; + /// Emit DWARF debug frame section. unsigned ForceDwarfFrameSection : 1; /// Emit XRay Function Index section unsigned XRayOmitFunctionIndex : 1; + /// Stack protector guard offset to use. + unsigned StackProtectorGuardOffset : 32; + + /// Stack protector guard mode to use, e.g. tls, global. + StackProtectorGuards StackProtectorGuard = + StackProtectorGuards::None; + + /// Stack protector guard reg to use, e.g. usually fs or gs in X86. + std::string StackProtectorGuardReg = "None"; + /// FloatABIType - This setting is set by -float-abi=xxx option is specfied /// on the command line. This setting may either be Default, Soft, or Hard. /// Default selects the target's default behavior. Soft selects the ABI for diff --git a/llvm/include/llvm/Target/TargetPfmCounters.td b/llvm/include/llvm/Target/TargetPfmCounters.td index e1d5013c1291..b00f3e19c35f 100644 --- a/llvm/include/llvm/Target/TargetPfmCounters.td +++ b/llvm/include/llvm/Target/TargetPfmCounters.td @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// // // This file defines the target-independent interfaces for performance counters. +// +//===----------------------------------------------------------------------===// // Definition of a hardware counters from libpfm identifiers. class PfmCounter<string counter> { diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td index 9f2f27ddcb25..a822878ead7f 100644 --- a/llvm/include/llvm/Target/TargetSchedule.td +++ b/llvm/include/llvm/Target/TargetSchedule.td @@ -87,7 +87,7 @@ class SchedMachineModel { // Per-cycle resources tables. ProcessorItineraries Itineraries = NoItineraries; - bit PostRAScheduler = 0; // Enable Post RegAlloc Scheduler pass. + bit PostRAScheduler = false; // Enable Post RegAlloc Scheduler pass. // Subtargets that define a model for only a subset of instructions // that have a scheduling class (itinerary class or SchedRW list) @@ -96,13 +96,13 @@ class SchedMachineModel { // be an error. This should only be set during initial bringup, // or there will be no way to catch simple errors in the model // resulting from changes to the instruction definitions. - bit CompleteModel = 1; + bit CompleteModel = true; // Indicates that we should do full overlap checking for multiple InstrRWs // defining the same instructions within the same SchedMachineModel. // FIXME: Remove when all in tree targets are clean with the full check // enabled. - bit FullInstRWOverlapCheck = 1; + bit FullInstRWOverlapCheck = true; // A processor may only implement part of published ISA, due to either new ISA // extensions, (e.g. Pentium 4 doesn't have AVX) or implementation @@ -118,12 +118,12 @@ class SchedMachineModel { // field. list<Predicate> UnsupportedFeatures = []; - bit NoModel = 0; // Special tag to indicate missing machine model. + bit NoModel = false; // Special tag to indicate missing machine model. } def NoSchedModel : SchedMachineModel { - let NoModel = 1; - let CompleteModel = 0; + let NoModel = true; + let CompleteModel = false; } // Define a kind of processor resource that may be common across @@ -254,14 +254,14 @@ class ProcWriteResources<list<ProcResourceKind> resources> { list<int> ResourceCycles = []; int Latency = 1; int NumMicroOps = 1; - bit BeginGroup = 0; - bit EndGroup = 0; + bit BeginGroup = false; + bit EndGroup = false; // Allow a processor to mark some scheduling classes as unsupported // for stronger verification. - bit Unsupported = 0; + bit Unsupported = false; // Allow a processor to mark some scheduling classes as single-issue. // SingleIssue is an alias for Begin/End Group. - bit SingleIssue = 0; + bit SingleIssue = false; SchedMachineModel SchedModel = ?; } @@ -317,7 +317,7 @@ class ProcReadAdvance<int cycles, list<SchedWrite> writes = []> { list<SchedWrite> ValidWrites = writes; // Allow a processor to mark some scheduling classes as unsupported // for stronger verification. - bit Unsupported = 0; + bit Unsupported = false; SchedMachineModel SchedModel = ?; } @@ -395,7 +395,7 @@ class SchedVar<SchedPredicateBase pred, list<SchedReadWrite> selected> { // SchedModel silences warnings but is ignored. class SchedVariant<list<SchedVar> variants> { list<SchedVar> Variants = variants; - bit Variadic = 0; + bit Variadic = false; SchedMachineModel SchedModel = ?; } @@ -428,7 +428,7 @@ class InstRW<list<SchedReadWrite> rw, dag instrlist> { dag Instrs = instrlist; SchedMachineModel SchedModel = ?; // Allow a subtarget to mark some instructions as unsupported. - bit Unsupported = 0; + bit Unsupported = false; } // Map a set of itinerary classes to SchedReadWrite resources. This is @@ -535,7 +535,7 @@ class SchedAlias<SchedReadWrite match, SchedReadWrite alias> { class RegisterFile<int numPhysRegs, list<RegisterClass> Classes = [], list<int> Costs = [], list<bit> AllowMoveElim = [], - int MaxMoveElimPerCy = 0, bit AllowZeroMoveElimOnly = 0> { + int MaxMoveElimPerCy = 0, bit AllowZeroMoveElimOnly = false> { list<RegisterClass> RegClasses = Classes; list<int> RegCosts = Costs; list<bit> AllowMoveElimination = AllowMoveElim; diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index de809bb10d49..a09feca6ca9b 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -164,6 +164,9 @@ def SDTIntToFPOp : SDTypeProfile<1, 1, [ // [su]int_to_fp def SDTFPToIntOp : SDTypeProfile<1, 1, [ // fp_to_[su]int SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1> ]>; +def SDTFPToIntSatOp : SDTypeProfile<1, 2, [ // fp_to_[su]int_sat + SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>, SDTCisSameNumEltsAs<0, 1> +]>; def SDTExtInreg : SDTypeProfile<1, 2, [ // sext_inreg SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisVT<2, OtherVT>, SDTCisVTSmallerThanOp<2, 1> @@ -212,6 +215,8 @@ def SDTCatchret : SDTypeProfile<0, 2, [ // catchret def SDTNone : SDTypeProfile<0, 0, []>; // ret, trap +def SDTUBSANTrap : SDTypeProfile<0, 1, []>; // ubsantrap + def SDTLoad : SDTypeProfile<1, 1, [ // load SDTCisPtrTy<1> ]>; @@ -245,6 +250,10 @@ def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert def SDTVecReduce : SDTypeProfile<1, 1, [ // vector reduction SDTCisInt<0>, SDTCisVec<1> ]>; +def SDTFPVecReduce : SDTypeProfile<1, 1, [ // FP vector reduction + SDTCisFP<0>, SDTCisVec<1> +]>; + def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract SDTCisSubVecOfVec<0,1>, SDTCisInt<2> @@ -396,6 +405,8 @@ def saddsat : SDNode<"ISD::SADDSAT" , SDTIntBinOp, [SDNPCommutative]>; def uaddsat : SDNode<"ISD::UADDSAT" , SDTIntBinOp, [SDNPCommutative]>; def ssubsat : SDNode<"ISD::SSUBSAT" , SDTIntBinOp>; def usubsat : SDNode<"ISD::USUBSAT" , SDTIntBinOp>; +def sshlsat : SDNode<"ISD::SSHLSAT" , SDTIntBinOp>; +def ushlsat : SDNode<"ISD::USHLSAT" , SDTIntBinOp>; def smulfix : SDNode<"ISD::SMULFIX" , SDTIntScaledBinOp, [SDNPCommutative]>; def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>; @@ -432,14 +443,15 @@ def vecreduce_smax : SDNode<"ISD::VECREDUCE_SMAX", SDTVecReduce>; def vecreduce_umax : SDNode<"ISD::VECREDUCE_UMAX", SDTVecReduce>; def vecreduce_smin : SDNode<"ISD::VECREDUCE_SMIN", SDTVecReduce>; def vecreduce_umin : SDNode<"ISD::VECREDUCE_UMIN", SDTVecReduce>; +def vecreduce_fadd : SDNode<"ISD::VECREDUCE_FADD", SDTFPVecReduce>; def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>; def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>; def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>; def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>; def frem : SDNode<"ISD::FREM" , SDTFPBinOp>; -def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp>; -def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp>; +def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp, [SDNPCommutative]>; +def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp, [SDNPCommutative]>; def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>; def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; @@ -482,6 +494,8 @@ def sint_to_fp : SDNode<"ISD::SINT_TO_FP" , SDTIntToFPOp>; def uint_to_fp : SDNode<"ISD::UINT_TO_FP" , SDTIntToFPOp>; def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>; def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>; +def fp_to_sint_sat : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntSatOp>; +def fp_to_uint_sat : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntSatOp>; def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>; def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>; @@ -496,7 +510,7 @@ def strict_fdiv : SDNode<"ISD::STRICT_FDIV", def strict_frem : SDNode<"ISD::STRICT_FREM", SDTFPBinOp, [SDNPHasChain]>; def strict_fma : SDNode<"ISD::STRICT_FMA", - SDTFPTernaryOp, [SDNPHasChain]>; + SDTFPTernaryOp, [SDNPHasChain, SDNPCommutative]>; def strict_fsqrt : SDNode<"ISD::STRICT_FSQRT", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fsin : SDNode<"ISD::STRICT_FSIN", @@ -553,6 +567,8 @@ def strict_sint_to_fp : SDNode<"ISD::STRICT_SINT_TO_FP", SDTIntToFPOp, [SDNPHasChain]>; def strict_uint_to_fp : SDNode<"ISD::STRICT_UINT_TO_FP", SDTIntToFPOp, [SDNPHasChain]>; +def strict_fsetcc : SDNode<"ISD::STRICT_FSETCC", SDTSetCC, [SDNPHasChain]>; +def strict_fsetccs : SDNode<"ISD::STRICT_FSETCCS", SDTSetCC, [SDNPHasChain]>; def setcc : SDNode<"ISD::SETCC" , SDTSetCC>; def select : SDNode<"ISD::SELECT" , SDTSelect>; @@ -571,6 +587,8 @@ def trap : SDNode<"ISD::TRAP" , SDTNone, [SDNPHasChain, SDNPSideEffect]>; def debugtrap : SDNode<"ISD::DEBUGTRAP" , SDTNone, [SDNPHasChain, SDNPSideEffect]>; +def ubsantrap : SDNode<"ISD::UBSANTRAP" , SDTUBSANTrap, + [SDNPHasChain, SDNPSideEffect]>; def prefetch : SDNode<"ISD::PREFETCH" , SDTPrefetch, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, @@ -634,6 +652,7 @@ def ist : SDNode<"ISD::STORE" , SDTIStore, def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>; def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>; +def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>; def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>, []>; @@ -749,7 +768,7 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}], // This is useful when Fragments involves associative / commutative // operators: a single piece of code can easily refer to all operands even // when re-associated / commuted variants of the fragment are matched. - bit PredicateCodeUsesOperands = 0; + bit PredicateCodeUsesOperands = false; // Define a few pre-packaged predicates. This helps GlobalISel import // existing rules from SelectionDAG for many common cases. @@ -848,13 +867,13 @@ class ImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm, SDNode ImmNode = imm> : PatFrag<(ops), (vt ImmNode), [{}], xform> { let ImmediateCode = pred; - bit FastIselShouldIgnore = 0; + bit FastIselShouldIgnore = false; // Is the data type of the immediate an APInt? - bit IsAPInt = 0; + bit IsAPInt = false; // Is the data type of the immediate an APFloat? - bit IsAPFloat = 0; + bit IsAPFloat = false; } // Convenience wrapper for ImmLeaf to use timm/TargetConstant instead @@ -871,8 +890,8 @@ class TImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm, // IntImmLeaf will allow GlobalISel to import the rule. class IntImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm> : ImmLeaf<vt, pred, xform> { - let IsAPInt = 1; - let FastIselShouldIgnore = 1; + let IsAPInt = true; + let FastIselShouldIgnore = true; } // An ImmLeaf except that Imm is an APFloat. @@ -881,8 +900,8 @@ class IntImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm> // generate code for rules that make use of it. class FPImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm> : ImmLeaf<vt, pred, xform, fpimm> { - let IsAPFloat = 1; - let FastIselShouldIgnore = 1; + let IsAPFloat = true; + let FastIselShouldIgnore = true; } // Leaf fragments. @@ -890,17 +909,23 @@ class FPImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm> def vtInt : PatLeaf<(vt), [{ return N->getVT().isInteger(); }]>; def vtFP : PatLeaf<(vt), [{ return N->getVT().isFloatingPoint(); }]>; -// Use ISD::isBuildVectorAllOnes or ISD::isBuildVectorAllZeros to look for -// the corresponding build_vector. Will look through bitcasts except when used -// as a pattern root. -def immAllOnesV; // ISD::isBuildVectorAllOnes -def immAllZerosV; // ISD::isBuildVectorAllZeros +// Use ISD::isConstantSplatVectorAllOnes or ISD::isConstantSplatVectorAllZeros +// to look for the corresponding build_vector or splat_vector. Will look through +// bitcasts and check for either opcode, except when used as a pattern root. +// When used as a pattern root, only fixed-length build_vector and scalable +// splat_vector are supported. +def immAllOnesV; // ISD::isConstantSplatVectorAllOnes +def immAllZerosV; // ISD::isConstantSplatVectorAllZeros // Other helper fragments. def not : PatFrag<(ops node:$in), (xor node:$in, -1)>; def vnot : PatFrag<(ops node:$in), (xor node:$in, immAllOnesV)>; def ineg : PatFrag<(ops node:$in), (sub 0, node:$in)>; +def zanyext : PatFrags<(ops node:$op), + [(zext node:$op), + (anyext node:$op)]>; + // null_frag - The null pattern operator is used in multiclass instantiations // which accept an SDPatternOperator for use in matching patterns for internal // definitions. When expanding a pattern, if the null fragment is referenced @@ -910,222 +935,222 @@ def null_frag : SDPatternOperator; // load fragments. def unindexedload : PatFrag<(ops node:$ptr), (ld node:$ptr)> { - let IsLoad = 1; - let IsUnindexed = 1; + let IsLoad = true; + let IsUnindexed = true; } def load : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { - let IsLoad = 1; - let IsNonExtLoad = 1; + let IsLoad = true; + let IsNonExtLoad = true; } // extending load fragments. def extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { - let IsLoad = 1; - let IsAnyExtLoad = 1; + let IsLoad = true; + let IsAnyExtLoad = true; } def sextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { - let IsLoad = 1; - let IsSignExtLoad = 1; + let IsLoad = true; + let IsSignExtLoad = true; } def zextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { - let IsLoad = 1; - let IsZeroExtLoad = 1; + let IsLoad = true; + let IsZeroExtLoad = true; } def extloadi1 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = i1; } def extloadi8 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = i8; } def extloadi16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = i16; } def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = i32; } def extloadf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = f16; } def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = f32; } def extloadf64 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = f64; } def sextloadi1 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = i1; } def sextloadi8 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = i8; } def sextloadi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = i16; } def sextloadi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = i32; } def zextloadi1 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = i1; } def zextloadi8 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = i8; } def zextloadi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = i16; } def zextloadi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let MemoryVT = i32; } def extloadvi1 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = i1; } def extloadvi8 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = i8; } def extloadvi16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = i16; } def extloadvi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = i32; } def extloadvf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = f32; } def extloadvf64 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = f64; } def sextloadvi1 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = i1; } def sextloadvi8 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = i8; } def sextloadvi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = i16; } def sextloadvi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = i32; } def zextloadvi1 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = i1; } def zextloadvi8 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = i8; } def zextloadvi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = i16; } def zextloadvi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { - let IsLoad = 1; + let IsLoad = true; let ScalarMemoryVT = i32; } // store fragments. def unindexedstore : PatFrag<(ops node:$val, node:$ptr), (st node:$val, node:$ptr)> { - let IsStore = 1; - let IsUnindexed = 1; + let IsStore = true; + let IsUnindexed = true; } def store : PatFrag<(ops node:$val, node:$ptr), (unindexedstore node:$val, node:$ptr)> { - let IsStore = 1; - let IsTruncStore = 0; + let IsStore = true; + let IsTruncStore = false; } // truncstore fragments. def truncstore : PatFrag<(ops node:$val, node:$ptr), (unindexedstore node:$val, node:$ptr)> { - let IsStore = 1; - let IsTruncStore = 1; + let IsStore = true; + let IsTruncStore = true; } def truncstorei8 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = i8; } def truncstorei16 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = i16; } def truncstorei32 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = i32; } def truncstoref16 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = f16; } def truncstoref32 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = f32; } def truncstoref64 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = f64; } def truncstorevi8 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { - let IsStore = 1; + let IsStore = true; let ScalarMemoryVT = i8; } def truncstorevi16 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { - let IsStore = 1; + let IsStore = true; let ScalarMemoryVT = i16; } def truncstorevi32 : PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr)> { - let IsStore = 1; + let IsStore = true; let ScalarMemoryVT = i32; } // indexed store fragments. def istore : PatFrag<(ops node:$val, node:$base, node:$offset), (ist node:$val, node:$base, node:$offset)> { - let IsStore = 1; - let IsTruncStore = 0; + let IsStore = true; + let IsTruncStore = false; } def pre_store : PatFrag<(ops node:$val, node:$base, node:$offset), @@ -1136,8 +1161,8 @@ def pre_store : PatFrag<(ops node:$val, node:$base, node:$offset), def itruncstore : PatFrag<(ops node:$val, node:$base, node:$offset), (ist node:$val, node:$base, node:$offset)> { - let IsStore = 1; - let IsTruncStore = 1; + let IsStore = true; + let IsTruncStore = true; } def pre_truncst : PatFrag<(ops node:$val, node:$base, node:$offset), (itruncstore node:$val, node:$base, node:$offset), [{ @@ -1146,37 +1171,37 @@ def pre_truncst : PatFrag<(ops node:$val, node:$base, node:$offset), }]>; def pre_truncsti1 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = i1; } def pre_truncsti8 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = i8; } def pre_truncsti16 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = i16; } def pre_truncsti32 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = i32; } def pre_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = f32; } def pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let ScalarMemoryVT = i8; } def pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), (pre_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let ScalarMemoryVT = i16; } @@ -1193,37 +1218,37 @@ def post_truncst : PatFrag<(ops node:$val, node:$base, node:$offset), }]>; def post_truncsti1 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = i1; } def post_truncsti8 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = i8; } def post_truncsti16 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = i16; } def post_truncsti32 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = i32; } def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let MemoryVT = f32; } def post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let ScalarMemoryVT = i8; } def post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset), (post_truncst node:$val, node:$base, node:$offset)> { - let IsStore = 1; + let IsStore = true; let ScalarMemoryVT = i16; } @@ -1420,82 +1445,88 @@ def any_sint_to_fp : PatFrags<(ops node:$src), def any_uint_to_fp : PatFrags<(ops node:$src), [(strict_uint_to_fp node:$src), (uint_to_fp node:$src)]>; +def any_fsetcc : PatFrags<(ops node:$lhs, node:$rhs, node:$pred), + [(strict_fsetcc node:$lhs, node:$rhs, node:$pred), + (setcc node:$lhs, node:$rhs, node:$pred)]>; +def any_fsetccs : PatFrags<(ops node:$lhs, node:$rhs, node:$pred), + [(strict_fsetccs node:$lhs, node:$rhs, node:$pred), + (setcc node:$lhs, node:$rhs, node:$pred)]>; multiclass binary_atomic_op_ord<SDNode atomic_op> { def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val), (!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> { - let IsAtomic = 1; - let IsAtomicOrderingMonotonic = 1; + let IsAtomic = true; + let IsAtomicOrderingMonotonic = true; } def NAME#_acquire : PatFrag<(ops node:$ptr, node:$val), (!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> { - let IsAtomic = 1; - let IsAtomicOrderingAcquire = 1; + let IsAtomic = true; + let IsAtomicOrderingAcquire = true; } def NAME#_release : PatFrag<(ops node:$ptr, node:$val), (!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> { - let IsAtomic = 1; - let IsAtomicOrderingRelease = 1; + let IsAtomic = true; + let IsAtomicOrderingRelease = true; } def NAME#_acq_rel : PatFrag<(ops node:$ptr, node:$val), (!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> { - let IsAtomic = 1; - let IsAtomicOrderingAcquireRelease = 1; + let IsAtomic = true; + let IsAtomicOrderingAcquireRelease = true; } def NAME#_seq_cst : PatFrag<(ops node:$ptr, node:$val), (!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> { - let IsAtomic = 1; - let IsAtomicOrderingSequentiallyConsistent = 1; + let IsAtomic = true; + let IsAtomicOrderingSequentiallyConsistent = true; } } multiclass ternary_atomic_op_ord<SDNode atomic_op> { def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> { - let IsAtomic = 1; - let IsAtomicOrderingMonotonic = 1; + let IsAtomic = true; + let IsAtomicOrderingMonotonic = true; } def NAME#_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> { - let IsAtomic = 1; - let IsAtomicOrderingAcquire = 1; + let IsAtomic = true; + let IsAtomicOrderingAcquire = true; } def NAME#_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> { - let IsAtomic = 1; - let IsAtomicOrderingRelease = 1; + let IsAtomic = true; + let IsAtomicOrderingRelease = true; } def NAME#_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> { - let IsAtomic = 1; - let IsAtomicOrderingAcquireRelease = 1; + let IsAtomic = true; + let IsAtomicOrderingAcquireRelease = true; } def NAME#_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> { - let IsAtomic = 1; - let IsAtomicOrderingSequentiallyConsistent = 1; + let IsAtomic = true; + let IsAtomicOrderingSequentiallyConsistent = true; } } multiclass binary_atomic_op<SDNode atomic_op, bit IsInt = 1> { def _8 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { - let IsAtomic = 1; + let IsAtomic = true; let MemoryVT = !if(IsInt, i8, ?); } def _16 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { - let IsAtomic = 1; + let IsAtomic = true; let MemoryVT = !if(IsInt, i16, f16); } def _32 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { - let IsAtomic = 1; + let IsAtomic = true; let MemoryVT = !if(IsInt, i32, f32); } def _64 : PatFrag<(ops node:$ptr, node:$val), (atomic_op node:$ptr, node:$val)> { - let IsAtomic = 1; + let IsAtomic = true; let MemoryVT = !if(IsInt, i64, f64); } @@ -1508,22 +1539,22 @@ multiclass binary_atomic_op<SDNode atomic_op, bit IsInt = 1> { multiclass ternary_atomic_op<SDNode atomic_op> { def _8 : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (atomic_op node:$ptr, node:$cmp, node:$val)> { - let IsAtomic = 1; + let IsAtomic = true; let MemoryVT = i8; } def _16 : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (atomic_op node:$ptr, node:$cmp, node:$val)> { - let IsAtomic = 1; + let IsAtomic = true; let MemoryVT = i16; } def _32 : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (atomic_op node:$ptr, node:$cmp, node:$val)> { - let IsAtomic = 1; + let IsAtomic = true; let MemoryVT = i32; } def _64 : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (atomic_op node:$ptr, node:$cmp, node:$val)> { - let IsAtomic = 1; + let IsAtomic = true; let MemoryVT = i64; } @@ -1551,25 +1582,25 @@ defm atomic_cmp_swap : ternary_atomic_op<atomic_cmp_swap>; def atomic_load_8 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { - let IsAtomic = 1; + let IsAtomic = true; let MemoryVT = i8; } def atomic_load_16 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { - let IsAtomic = 1; + let IsAtomic = true; let MemoryVT = i16; } def atomic_load_32 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { - let IsAtomic = 1; + let IsAtomic = true; let MemoryVT = i32; } def atomic_load_64 : PatFrag<(ops node:$ptr), (atomic_load node:$ptr)> { - let IsAtomic = 1; + let IsAtomic = true; let MemoryVT = i64; } diff --git a/llvm/include/llvm/Testing/Support/SupportHelpers.h b/llvm/include/llvm/Testing/Support/SupportHelpers.h index 38726b1cfaf7..2419fc95d817 100644 --- a/llvm/include/llvm/Testing/Support/SupportHelpers.h +++ b/llvm/include/llvm/Testing/Support/SupportHelpers.h @@ -12,6 +12,8 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_os_ostream.h" #include "gmock/gmock-matchers.h" #include "gtest/gtest-printers.h" @@ -103,7 +105,143 @@ detail::ValueIsMatcher<InnerMatcher> ValueIs(const InnerMatcher &ValueMatcher) { return detail::ValueIsMatcher<InnerMatcher>(ValueMatcher); } namespace unittest { + SmallString<128> getInputFileDirectory(const char *Argv0); + +/// A RAII object that creates a temporary directory upon initialization and +/// removes it upon destruction. +class TempDir { + SmallString<128> Path; + +public: + /// Creates a managed temporary directory. + /// + /// @param Name The name of the directory to create. + /// @param Unique If true, the directory will be created using + /// llvm::sys::fs::createUniqueDirectory. + explicit TempDir(StringRef Name, bool Unique = false) { + std::error_code EC; + if (Unique) { + EC = llvm::sys::fs::createUniqueDirectory(Name, Path); + if (!EC) { + // Resolve any symlinks in the new directory. + std::string UnresolvedPath(Path.str()); + EC = llvm::sys::fs::real_path(UnresolvedPath, Path); + } + } else { + Path = Name; + EC = llvm::sys::fs::create_directory(Path); + } + if (EC) + Path.clear(); + EXPECT_FALSE(EC) << EC.message(); + } + + ~TempDir() { + if (!Path.empty()) { + EXPECT_FALSE(llvm::sys::fs::remove_directories(Path.str())); + } + } + + TempDir(const TempDir &) = delete; + TempDir &operator=(const TempDir &) = delete; + + TempDir(TempDir &&) = default; + TempDir &operator=(TempDir &&) = default; + + /// The path to the temporary directory. + StringRef path() const { return Path; } + + /// The null-terminated C string pointing to the path. + const char *c_str() { return Path.c_str(); } + + /// Creates a new path by appending the argument to the path of the managed + /// directory using the native path separator. + SmallString<128> path(StringRef component) const { + SmallString<128> Result(Path); + SmallString<128> ComponentToAppend(component); + llvm::sys::path::native(ComponentToAppend); + llvm::sys::path::append(Result, Twine(ComponentToAppend)); + return Result; + } +}; + +/// A RAII object that creates a link upon initialization and +/// removes it upon destruction. +/// +/// The link may be a soft or a hard link, depending on the platform. +class TempLink { + SmallString<128> Path; + +public: + /// Creates a managed link at path Link pointing to Target. + TempLink(StringRef Target, StringRef Link) { + Path = Link; + std::error_code EC = sys::fs::create_link(Target, Link); + if (EC) + Path.clear(); + EXPECT_FALSE(EC); + } + ~TempLink() { + if (!Path.empty()) { + EXPECT_FALSE(llvm::sys::fs::remove(Path.str())); + } + } + + TempLink(const TempLink &) = delete; + TempLink &operator=(const TempLink &) = delete; + + TempLink(TempLink &&) = default; + TempLink &operator=(TempLink &&) = default; + + /// The path to the link. + StringRef path() const { return Path; } +}; + +/// A RAII object that creates a file upon initialization and +/// removes it upon destruction. +class TempFile { + SmallString<128> Path; + +public: + /// Creates a managed file. + /// + /// @param Name The name of the file to create. + /// @param Contents The string to write to the file. + /// @param Unique If true, the file will be created using + /// llvm::sys::fs::createTemporaryFile. + TempFile(StringRef Name, StringRef Suffix = "", StringRef Contents = "", + bool Unique = false) { + std::error_code EC; + int fd; + if (Unique) { + EC = llvm::sys::fs::createTemporaryFile(Name, Suffix, fd, Path); + } else { + Path = Name; + if (!Suffix.empty()) { + Path.append("."); + Path.append(Suffix); + } + EC = llvm::sys::fs::openFileForWrite(Path, fd); + } + EXPECT_FALSE(EC); + raw_fd_ostream OS(fd, /*shouldClose*/ true); + OS << Contents; + OS.flush(); + EXPECT_FALSE(OS.error()); + if (EC || OS.error()) + Path.clear(); + } + ~TempFile() { + if (!Path.empty()) { + EXPECT_FALSE(llvm::sys::fs::remove(Path.str())); + } + } + + /// The path to the file. + StringRef path() const { return Path; } +}; + } // namespace unittest } // namespace llvm diff --git a/llvm/include/llvm/TextAPI/MachO/Platform.h b/llvm/include/llvm/TextAPI/MachO/Platform.h index a22aae9b7dce..fc59b8678af7 100644 --- a/llvm/include/llvm/TextAPI/MachO/Platform.h +++ b/llvm/include/llvm/TextAPI/MachO/Platform.h @@ -29,7 +29,8 @@ enum class PlatformKind : unsigned { macCatalyst = MachO::PLATFORM_MACCATALYST, iOSSimulator = MachO::PLATFORM_IOSSIMULATOR, tvOSSimulator = MachO::PLATFORM_TVOSSIMULATOR, - watchOSSimulator = MachO::PLATFORM_WATCHOSSIMULATOR + watchOSSimulator = MachO::PLATFORM_WATCHOSSIMULATOR, + driverKit = MachO::PLATFORM_DRIVERKIT, }; using PlatformSet = SmallSet<PlatformKind, 3>; @@ -42,4 +43,4 @@ StringRef getPlatformName(PlatformKind Platform); } // end namespace MachO. } // end namespace llvm. -#endif // LLVM_TEXTAPI_MACHO_PLATFORM_H
\ No newline at end of file +#endif // LLVM_TEXTAPI_MACHO_PLATFORM_H diff --git a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h index 887c8807904e..e5e24e0b6311 100644 --- a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h +++ b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h @@ -17,7 +17,6 @@ #ifndef LLVM_TRANSFORMS_AGGRESSIVE_INSTCOMBINE_INSTCOMBINE_H #define LLVM_TRANSFORMS_AGGRESSIVE_INSTCOMBINE_INSTCOMBINE_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/Coroutines.h b/llvm/include/llvm/Transforms/Coroutines.h index ef05f549fbc1..204359254d4e 100644 --- a/llvm/include/llvm/Transforms/Coroutines.h +++ b/llvm/include/llvm/Transforms/Coroutines.h @@ -23,7 +23,7 @@ void addCoroutinePassesToExtensionPoints(PassManagerBuilder &Builder); Pass *createCoroEarlyLegacyPass(); /// Split up coroutines into multiple functions driving their state machines. -Pass *createCoroSplitLegacyPass(); +Pass *createCoroSplitLegacyPass(bool ReuseFrameSlot = false); /// Analyze coroutines use sites, devirtualize resume/destroy calls and elide /// heap allocation for coroutine frame where possible. diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h b/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h index c3caa55c25ce..7ecdc050335d 100644 --- a/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h +++ b/llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h @@ -22,6 +22,7 @@ class Function; struct CoroCleanupPass : PassInfoMixin<CoroCleanupPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h b/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h index 0f5d1e40eb17..3f5ec2abd172 100644 --- a/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h +++ b/llvm/include/llvm/Transforms/Coroutines/CoroEarly.h @@ -25,6 +25,7 @@ class Function; struct CoroEarlyPass : PassInfoMixin<CoroEarlyPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroElide.h b/llvm/include/llvm/Transforms/Coroutines/CoroElide.h index 348e8e355ea0..ff73cf20c5bf 100644 --- a/llvm/include/llvm/Transforms/Coroutines/CoroElide.h +++ b/llvm/include/llvm/Transforms/Coroutines/CoroElide.h @@ -24,6 +24,7 @@ class Function; struct CoroElidePass : PassInfoMixin<CoroElidePass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroSplit.h b/llvm/include/llvm/Transforms/Coroutines/CoroSplit.h index 40424e5a7e6a..f4eef19b20e5 100644 --- a/llvm/include/llvm/Transforms/Coroutines/CoroSplit.h +++ b/llvm/include/llvm/Transforms/Coroutines/CoroSplit.h @@ -22,8 +22,13 @@ namespace llvm { struct CoroSplitPass : PassInfoMixin<CoroSplitPass> { + CoroSplitPass(bool ReuseFrameSlot = false) : ReuseFrameSlot(ReuseFrameSlot) {} + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); + static bool isRequired() { return true; } + + bool ReuseFrameSlot; }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h b/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h new file mode 100644 index 000000000000..6c753032f913 --- /dev/null +++ b/llvm/include/llvm/Transforms/HelloNew/HelloWorld.h @@ -0,0 +1,23 @@ +//===-- HelloWorld.h - Example Transformations ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H +#define LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class HelloWorldPass : public PassInfoMixin<HelloWorldPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h index 28e454d3b0fc..af357181597a 100644 --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -31,6 +31,13 @@ class raw_ostream; //===----------------------------------------------------------------------===// // +// This pass adds !annotation metadata to entries in the +// @llvm.global.annotations global constant. +// +ModulePass *createAnnotation2MetadataLegacyPass(); + +//===----------------------------------------------------------------------===// +// // These functions removes symbols from functions and modules. If OnlyDebugInfo // is true, only debugging information is removed from the module. // @@ -156,12 +163,6 @@ Pass *createArgumentPromotionPass(unsigned maxElements = 3); Pass *createOpenMPOptLegacyPass(); //===----------------------------------------------------------------------===// -/// createIPConstantPropagationPass - This pass propagates constants from call -/// sites into the bodies of functions. -/// -ModulePass *createIPConstantPropagationPass(); - -//===----------------------------------------------------------------------===// /// createIPSCCPPass - This pass propagates constants from call sites into the /// bodies of functions, and keeps track of whether basic blocks are executable /// in the process. @@ -215,6 +216,11 @@ ModulePass *createMergeFunctionsPass(); ModulePass *createHotColdSplittingPass(); //===----------------------------------------------------------------------===// +/// createIROutlinerPass - This pass finds similar code regions and factors +/// those regions out into functions. +ModulePass *createIROutlinerPass(); + +//===----------------------------------------------------------------------===// /// createPartialInliningPass - This pass inlines parts of functions. /// ModulePass *createPartialInliningPass(); diff --git a/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h index 64e25230f6da..6a208dfa6a25 100644 --- a/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h +++ b/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h @@ -34,6 +34,7 @@ public: : InsertLifetime(InsertLifetime) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &); + static bool isRequired() { return true; } }; /// Create a legacy pass manager instance of a pass to inline and remove diff --git a/llvm/include/llvm/Transforms/IPO/Annotation2Metadata.h b/llvm/include/llvm/Transforms/IPO/Annotation2Metadata.h new file mode 100644 index 000000000000..cf7137b088c5 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/Annotation2Metadata.h @@ -0,0 +1,30 @@ +//===- Annotation2Metadata.h - Add !annotation metadata. --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// New pass manager pass to convert @llvm.global.annotations to !annotation +// metadata. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_ANNOTATION2METADATA_H +#define LLVM_TRANSFORMS_IPO_ANNOTATION2METADATA_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; + +/// Pass to convert @llvm.global.annotations to !annotation metadata. +struct Annotation2MetadataPass : public PassInfoMixin<Annotation2MetadataPass> { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_SCCP_H diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index bed180e6717a..dbaf945986e4 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -97,38 +97,44 @@ #ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CGSCCPassManager.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/AbstractCallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" namespace llvm { +struct AADepGraphNode; +struct AADepGraph; struct Attributor; struct AbstractAttribute; struct InformationCache; struct AAIsDead; +class AAManager; +class AAResults; class Function; -/// Simple enum classes that forces properties to be spelled out explicitly. -/// +/// The value passed to the line option that defines the maximal initialization +/// chain length. +extern unsigned MaxInitializationChainLength; + ///{ enum class ChangeStatus { CHANGED, @@ -144,6 +150,74 @@ enum class DepClassTy { }; ///} +/// The data structure for the nodes of a dependency graph +struct AADepGraphNode { +public: + virtual ~AADepGraphNode(){}; + using DepTy = PointerIntPair<AADepGraphNode *, 1>; + +protected: + /// Set of dependency graph nodes which should be updated if this one + /// is updated. The bit encodes if it is optional. + TinyPtrVector<DepTy> Deps; + + static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); } + static AbstractAttribute *DepGetValAA(DepTy &DT) { + return cast<AbstractAttribute>(DT.getPointer()); + } + + operator AbstractAttribute *() { return cast<AbstractAttribute>(this); } + +public: + using iterator = + mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>; + using aaiterator = + mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>; + + aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); } + aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); } + iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); } + iterator child_end() { return iterator(Deps.end(), &DepGetVal); } + + virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; } + TinyPtrVector<DepTy> &getDeps() { return Deps; } + + friend struct Attributor; + friend struct AADepGraph; +}; + +/// The data structure for the dependency graph +/// +/// Note that in this graph if there is an edge from A to B (A -> B), +/// then it means that B depends on A, and when the state of A is +/// updated, node B should also be updated +struct AADepGraph { + AADepGraph() {} + ~AADepGraph() {} + + using DepTy = AADepGraphNode::DepTy; + static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); } + using iterator = + mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>; + + /// There is no root node for the dependency graph. But the SCCIterator + /// requires a single entry point, so we maintain a fake("synthetic") root + /// node that depends on every node. + AADepGraphNode SyntheticRoot; + AADepGraphNode *GetEntryNode() { return &SyntheticRoot; } + + iterator begin() { return SyntheticRoot.child_begin(); } + iterator end() { return SyntheticRoot.child_end(); } + + void viewGraph(); + + /// Dump graph to file + void dumpGraph(); + + /// Print dependency graph + void print(); +}; + /// Helper to describe and deal with positions in the LLVM-IR. /// /// A position in the IR is described by an anchor value and an "offset" that @@ -263,8 +337,14 @@ struct IRPosition { /// Return the associated function, if any. Function *getAssociatedFunction() const { - if (auto *CB = dyn_cast<CallBase>(&getAnchorValue())) + if (auto *CB = dyn_cast<CallBase>(&getAnchorValue())) { + // We reuse the logic that associates callback calles to arguments of a + // call site here to identify the callback callee as the associated + // function. + if (Argument *Arg = getAssociatedArgument()) + return Arg->getParent(); return CB->getCalledFunction(); + } return getAnchorScope(); } @@ -312,10 +392,11 @@ struct IRPosition { /// Return the value this abstract attribute is associated with. Value &getAssociatedValue() const { - if (getArgNo() < 0 || isa<Argument>(&getAnchorValue())) + if (getCallSiteArgNo() < 0 || isa<Argument>(&getAnchorValue())) return getAnchorValue(); assert(isa<CallBase>(&getAnchorValue()) && "Expected a call base!"); - return *cast<CallBase>(&getAnchorValue())->getArgOperand(getArgNo()); + return *cast<CallBase>(&getAnchorValue()) + ->getArgOperand(getCallSiteArgNo()); } /// Return the type this abstract attribute is associated with. @@ -325,19 +406,22 @@ struct IRPosition { return getAssociatedValue().getType(); } - /// Return the argument number of the associated value if it is an argument or - /// call site argument, otherwise a negative value. - int getArgNo() const { - switch (getPositionKind()) { - case IRPosition::IRP_ARGUMENT: - return cast<Argument>(getAsValuePtr())->getArgNo(); - case IRPosition::IRP_CALL_SITE_ARGUMENT: { - Use &U = *getAsUsePtr(); - return cast<CallBase>(U.getUser())->getArgOperandNo(&U); - } - default: - return -1; - } + /// Return the callee argument number of the associated value if it is an + /// argument or call site argument, otherwise a negative value. In contrast to + /// `getCallSiteArgNo` this method will always return the "argument number" + /// from the perspective of the callee. This may not the same as the call site + /// if this is a callback call. + int getCalleeArgNo() const { + return getArgNo(/* CallbackCalleeArgIfApplicable */ true); + } + + /// Return the call site argument number of the associated value if it is an + /// argument or call site argument, otherwise a negative value. In contrast to + /// `getCalleArgNo` this method will always return the "operand number" from + /// the perspective of the call site. This may not the same as the callee + /// perspective if this is a callback call. + int getCallSiteArgNo() const { + return getArgNo(/* CallbackCalleeArgIfApplicable */ false); } /// Return the index in the attribute list for this position. @@ -354,7 +438,7 @@ struct IRPosition { return AttributeList::ReturnIndex; case IRPosition::IRP_ARGUMENT: case IRPosition::IRP_CALL_SITE_ARGUMENT: - return getArgNo() + AttributeList::FirstArgIndex; + return getCallSiteArgNo() + AttributeList::FirstArgIndex; } llvm_unreachable( "There is no attribute index for a floating or invalid position!"); @@ -439,6 +523,17 @@ struct IRPosition { } } + /// Return true if the position is an argument or call site argument. + bool isArgumentPosition() const { + switch (getPositionKind()) { + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + return true; + default: + return false; + } + } + /// Special DenseMap key values. /// ///{ @@ -485,6 +580,25 @@ private: verify(); } + /// Return the callee argument number of the associated value if it is an + /// argument or call site argument. See also `getCalleeArgNo` and + /// `getCallSiteArgNo`. + int getArgNo(bool CallbackCalleeArgIfApplicable) const { + if (CallbackCalleeArgIfApplicable) + if (Argument *Arg = getAssociatedArgument()) + return Arg->getArgNo(); + switch (getPositionKind()) { + case IRPosition::IRP_ARGUMENT: + return cast<Argument>(getAsValuePtr())->getArgNo(); + case IRPosition::IRP_CALL_SITE_ARGUMENT: { + Use &U = *getAsUsePtr(); + return cast<CallBase>(U.getUser())->getArgOperandNo(&U); + } + default: + return -1; + } + } + /// IRPosition for the use \p U. The position kind \p PK needs to be /// IRP_CALL_SITE_ARGUMENT, the anchor value is the user, the associated value /// the used value. @@ -648,7 +762,10 @@ struct InformationCache { [&](const Function &F) { return AG.getAnalysis<PostDominatorTreeAnalysis>(F); }), - AG(AG), CGSCC(CGSCC) {} + AG(AG), CGSCC(CGSCC) { + if (CGSCC) + initializeModuleSlice(*CGSCC); + } ~InformationCache() { // The FunctionInfo objects are allocated via a BumpPtrAllocator, we call @@ -657,6 +774,68 @@ struct InformationCache { It.getSecond()->~FunctionInfo(); } + /// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is + /// true, constant expression users are not given to \p CB but their uses are + /// traversed transitively. + template <typename CBTy> + static void foreachUse(Function &F, CBTy CB, + bool LookThroughConstantExprUses = true) { + SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses())); + + for (unsigned Idx = 0; Idx < Worklist.size(); ++Idx) { + Use &U = *Worklist[Idx]; + + // Allow use in constant bitcasts and simply look through them. + if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) { + for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses()) + Worklist.push_back(&CEU); + continue; + } + + CB(U); + } + } + + /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains + /// (a subset of) all functions that we can look at during this SCC traversal. + /// This includes functions (transitively) called from the SCC and the + /// (transitive) callers of SCC functions. We also can look at a function if + /// there is a "reference edge", i.a., if the function somehow uses (!=calls) + /// a function in the SCC or a caller of a function in the SCC. + void initializeModuleSlice(SetVector<Function *> &SCC) { + ModuleSlice.insert(SCC.begin(), SCC.end()); + + SmallPtrSet<Function *, 16> Seen; + SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end()); + while (!Worklist.empty()) { + Function *F = Worklist.pop_back_val(); + ModuleSlice.insert(F); + + for (Instruction &I : instructions(*F)) + if (auto *CB = dyn_cast<CallBase>(&I)) + if (Function *Callee = CB->getCalledFunction()) + if (Seen.insert(Callee).second) + Worklist.push_back(Callee); + } + + Seen.clear(); + Worklist.append(SCC.begin(), SCC.end()); + while (!Worklist.empty()) { + Function *F = Worklist.pop_back_val(); + ModuleSlice.insert(F); + + // Traverse all transitive uses. + foreachUse(*F, [&](Use &U) { + if (auto *UsrI = dyn_cast<Instruction>(U.getUser())) + if (Seen.insert(UsrI->getFunction()).second) + Worklist.push_back(UsrI->getFunction()); + }); + } + } + + /// The slice of the module we are allowed to look at. + SmallPtrSet<Function *, 8> ModuleSlice; + /// A vector type to hold instructions. using InstructionVectorTy = SmallVector<Instruction *, 8>; @@ -685,9 +864,7 @@ struct InformationCache { } /// Return AliasAnalysis Result for function \p F. - AAResults *getAAResultsForFunction(const Function &F) { - return AG.getAnalysis<AAManager>(F); - } + AAResults *getAAResultsForFunction(const Function &F); /// Return true if \p Arg is involved in a must-tail call, thus the argument /// of the caller or callee. @@ -715,6 +892,26 @@ struct InformationCache { /// Return the map conaining all the knowledge we have from `llvm.assume`s. const RetainedKnowledgeMap &getKnowledgeMap() const { return KnowledgeMap; } + /// Return if \p To is potentially reachable form \p From or not + /// If the same query was answered, return cached result + bool getPotentiallyReachable(const Instruction &From, const Instruction &To) { + auto KeyPair = std::make_pair(&From, &To); + auto Iter = PotentiallyReachableMap.find(KeyPair); + if (Iter != PotentiallyReachableMap.end()) + return Iter->second; + const Function &F = *From.getFunction(); + bool Result = isPotentiallyReachable( + &From, &To, nullptr, AG.getAnalysis<DominatorTreeAnalysis>(F), + AG.getAnalysis<LoopAnalysis>(F)); + PotentiallyReachableMap.insert(std::make_pair(KeyPair, Result)); + return Result; + } + + /// Check whether \p F is part of module slice. + bool isInModuleSlice(const Function &F) { + return ModuleSlice.count(const_cast<Function *>(&F)); + } + private: struct FunctionInfo { ~FunctionInfo(); @@ -774,6 +971,10 @@ private: /// Set of inlineable functions SmallPtrSet<const Function *, 8> InlineableFunctions; + /// A map for caching results of queries for isPotentiallyReachable + DenseMap<std::pair<const Instruction *, const Instruction *>, bool> + PotentiallyReachableMap; + /// Give the Attributor access to the members so /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them. friend struct Attributor; @@ -876,6 +1077,7 @@ struct Attributor { /// attribute. Using this after Attributor started running is restricted to /// only the Attributor itself. Initial seeding of AAs can be done via this /// function. + /// NOTE: ForceUpdate is ignored in any stage other than the update stage. template <typename AAType> const AAType &getOrCreateAAFor(const IRPosition &IRP, const AbstractAttribute *QueryingAA = nullptr, @@ -883,7 +1085,7 @@ struct Attributor { DepClassTy DepClass = DepClassTy::OPTIONAL, bool ForceUpdate = false) { if (AAType *AAPtr = lookupAAFor<AAType>(IRP, QueryingAA, TrackDependence)) { - if (ForceUpdate) + if (ForceUpdate && Phase == AttributorPhase::UPDATE) updateAA(*AAPtr); return *AAPtr; } @@ -893,7 +1095,7 @@ struct Attributor { auto &AA = AAType::createForPosition(IRP, *this); // If we are currenty seeding attributes, enforce seeding rules. - if (SeedingPeriod && !shouldSeedAttribute(AA)) { + if (Phase == AttributorPhase::SEEDING && !shouldSeedAttribute(AA)) { AA.getState().indicatePessimisticFixpoint(); return AA; } @@ -907,6 +1109,9 @@ struct Attributor { Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) || FnScope->hasFnAttribute(Attribute::OptimizeNone); + // Avoid too many nested initializations to prevent a stack overflow. + Invalidate |= InitializationChainLength > MaxInitializationChainLength; + // Bootstrap the new attribute with an initial update to propagate // information, e.g., function -> call site. If it is not on a given // Allowed we will not perform updates at all. @@ -915,24 +1120,39 @@ struct Attributor { return AA; } - AA.initialize(*this); + { + TimeTraceScope TimeScope(AA.getName() + "::initialize"); + ++InitializationChainLength; + AA.initialize(*this); + --InitializationChainLength; + } - // We can initialize (=look at) code outside the current function set but - // not call update because that would again spawn new abstract attributes in - // potentially unconnected code regions (=SCCs). + // Initialize and update is allowed for code outside of the current function + // set, but only if it is part of module slice we are allowed to look at. + // Only exception is AAIsDeadFunction whose initialization is prevented + // directly, since we don't to compute it twice. if (FnScope && !Functions.count(const_cast<Function *>(FnScope))) { + if (!getInfoCache().isInModuleSlice(*FnScope)) { + AA.getState().indicatePessimisticFixpoint(); + return AA; + } + } + + // If this is queried in the manifest stage, we force the AA to indicate + // pessimistic fixpoint immediately. + if (Phase == AttributorPhase::MANIFEST) { AA.getState().indicatePessimisticFixpoint(); return AA; } // Allow seeded attributes to declare dependencies. // Remember the seeding state. - bool OldSeedingPeriod = SeedingPeriod; - SeedingPeriod = false; + AttributorPhase OldPhase = Phase; + Phase = AttributorPhase::UPDATE; updateAA(AA); - SeedingPeriod = OldSeedingPeriod; + Phase = OldPhase; if (TrackDependence && AA.getState().isValidState()) recordDependence(AA, const_cast<AbstractAttribute &>(*QueryingAA), @@ -1001,7 +1221,11 @@ struct Attributor { assert(!AAPtr && "Attribute already in map!"); AAPtr = &AA; - AllAbstractAttributes.push_back(&AA); + // Register AA with the synthetic root only before the manifest stage. + if (Phase == AttributorPhase::SEEDING || Phase == AttributorPhase::UPDATE) + DG.SyntheticRoot.Deps.push_back( + AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED))); + return AA; } @@ -1310,6 +1534,22 @@ struct Attributor { bool checkForAllReadWriteInstructions(function_ref<bool(Instruction &)> Pred, AbstractAttribute &QueryingAA); + /// Create a shallow wrapper for \p F such that \p F has internal linkage + /// afterwards. It also sets the original \p F 's name to anonymous + /// + /// A wrapper is a function with the same type (and attributes) as \p F + /// that will only call \p F and return the result, if any. + /// + /// Assuming the declaration of looks like: + /// rty F(aty0 arg0, ..., atyN argN); + /// + /// The wrapper will then look as follows: + /// rty wrapper(aty0 arg0, ..., atyN argN) { + /// return F(arg0, ..., argN); + /// } + /// + static void createShallowWrapper(Function &F); + /// Return the data layout associated with the anchor scope. const DataLayout &getDataLayout() const { return InfoCache.DL; } @@ -1333,6 +1573,10 @@ private: /// Rewrites function signitures and updates the call graph. ChangeStatus cleanupIR(); + /// Identify internal functions that are effectively dead, thus not reachable + /// from a live entry point. The functions are added to ToBeDeletedFunctions. + void identifyDeadInternalFunctions(); + /// Run `::update` on \p AA and track the dependences queried while doing so. /// Also adjust the state if we know further updates are not necessary. ChangeStatus updateAA(AbstractAttribute &AA); @@ -1363,12 +1607,6 @@ private: /// See getOrCreateAAFor. bool shouldSeedAttribute(AbstractAttribute &AA); - /// The set of all abstract attributes. - ///{ - using AAVector = SmallVector<AbstractAttribute *, 64>; - AAVector AllAbstractAttributes; - ///} - /// A nested map to lookup abstract attributes based on the argument position /// on the outer level, and the addresses of the static member (AAType::ID) on /// the inner level. @@ -1390,6 +1628,9 @@ private: /// Helper to update an underlying call graph. CallGraphUpdater &CGUpdater; + /// Abstract Attribute dependency graph + AADepGraph DG; + /// Set of functions for which we modified the content such that it might /// impact the call graph. SmallPtrSet<Function *, 8> CGModifiedFunctions; @@ -1428,9 +1669,17 @@ private: /// Invoke instructions with at least a single dead successor block. SmallVector<WeakVH, 16> InvokeWithDeadSuccessor; - /// Wheather attributes are being `seeded`, always false after ::run function - /// gets called \see getOrCreateAAFor. - bool SeedingPeriod = true; + /// A flag that indicates which stage of the process we are in. Initially, the + /// phase is SEEDING. Phase is changed in `Attributor::run()` + enum class AttributorPhase { + SEEDING, + UPDATE, + MANIFEST, + CLEANUP, + } Phase = AttributorPhase::SEEDING; + + /// The current initialization chain length. Tracked to avoid stack overflows. + unsigned InitializationChainLength = 0; /// Functions, blocks, and instructions we delete after manifest is done. /// @@ -1439,6 +1688,8 @@ private: SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks; SmallDenseSet<WeakVH, 8> ToBeDeletedInsts; ///} + + friend AADepGraph; }; /// An interface to query the internal state of an abstract attribute. @@ -1917,7 +2168,7 @@ struct StateWrapper : public BaseType, public StateTy { StateType &getState() override { return *this; } /// See AbstractAttribute::getState(...). - const AbstractState &getState() const override { return *this; } + const StateType &getState() const override { return *this; } }; /// Helper class that provides common functionality to manifest IR attributes. @@ -2011,7 +2262,7 @@ struct IRAttribute : public BaseType { /// both directions will be added in the future. /// NOTE: The mechanics of adding a new "concrete" abstract attribute are /// described in the file comment. -struct AbstractAttribute : public IRPosition { +struct AbstractAttribute : public IRPosition, public AADepGraphNode { using StateType = AbstractState; AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {} @@ -2019,6 +2270,14 @@ struct AbstractAttribute : public IRPosition { /// Virtual destructor. virtual ~AbstractAttribute() {} + /// This function is used to identify if an \p DGN is of type + /// AbstractAttribute so that the dyn_cast and cast can use such information + /// to cast an AADepGraphNode to an AbstractAttribute. + /// + /// We eagerly return true here because all AADepGraphNodes except for the + /// Synthethis Node are of type AbstractAttribute + static bool classof(const AADepGraphNode *DGN) { return true; } + /// Initialize the state with the information in the Attributor \p A. /// /// This function is called by the Attributor once all abstract attributes @@ -2039,7 +2298,8 @@ struct AbstractAttribute : public IRPosition { /// Helper functions, for debug purposes only. ///{ - virtual void print(raw_ostream &OS) const; + void print(raw_ostream &OS) const override; + virtual void printWithDeps(raw_ostream &OS) const; void dump() const { print(dbgs()); } /// This function should return the "summarized" assumed state as string. @@ -2087,12 +2347,6 @@ protected: /// /// \Return CHANGED if the internal state changed, otherwise UNCHANGED. virtual ChangeStatus updateImpl(Attributor &A) = 0; - -private: - /// Set of abstract attributes which were queried by this one. The bit encodes - /// if there is an optional of required dependence. - using DepTy = PointerIntPair<AbstractAttribute *, 1>; - TinyPtrVector<DepTy> Deps; }; /// Forward declarations of output streams for debug purposes. @@ -2374,16 +2628,17 @@ struct AAReachability : public StateWrapper<BooleanState, AbstractAttribute> { /// Returns true if 'From' instruction is assumed to reach, 'To' instruction. /// Users should provide two positions they are interested in, and the class /// determines (and caches) reachability. - bool isAssumedReachable(const Instruction *From, - const Instruction *To) const { - return isPotentiallyReachable(From, To); + bool isAssumedReachable(Attributor &A, const Instruction &From, + const Instruction &To) const { + return A.getInfoCache().getPotentiallyReachable(From, To); } /// Returns true if 'From' instruction is known to reach, 'To' instruction. /// Users should provide two positions they are interested in, and the class /// determines (and caches) reachability. - bool isKnownReachable(const Instruction *From, const Instruction *To) const { - return isPotentiallyReachable(From, To); + bool isKnownReachable(Attributor &A, const Instruction &From, + const Instruction &To) const { + return A.getInfoCache().getPotentiallyReachable(From, To); } /// Create an abstract attribute view for the position \p IRP. @@ -2546,6 +2801,12 @@ public: return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F); } + /// Return if the edge from \p From BB to \p To BB is assumed dead. + /// This is specifically useful in AAReachability. + virtual bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const { + return false; + } + /// See AbstractAttribute::getName() const std::string getName() const override { return "AAIsDead"; } @@ -3202,7 +3463,7 @@ struct AAValueConstantRange /// See AbstractAttribute::getState(...). IntegerRangeState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } + const IntegerRangeState &getState() const override { return *this; } /// Create an abstract attribute view for the position \p IRP. static AAValueConstantRange &createForPosition(const IRPosition &IRP, @@ -3250,6 +3511,279 @@ struct AAValueConstantRange static const char ID; }; +/// A class for a set state. +/// The assumed boolean state indicates whether the corresponding set is full +/// set or not. If the assumed state is false, this is the worst state. The +/// worst state (invalid state) of set of potential values is when the set +/// contains every possible value (i.e. we cannot in any way limit the value +/// that the target position can take). That never happens naturally, we only +/// force it. As for the conditions under which we force it, see +/// AAPotentialValues. +template <typename MemberTy, typename KeyInfo = DenseMapInfo<MemberTy>> +struct PotentialValuesState : AbstractState { + using SetTy = DenseSet<MemberTy, KeyInfo>; + + PotentialValuesState() : IsValidState(true), UndefIsContained(false) {} + + PotentialValuesState(bool IsValid) + : IsValidState(IsValid), UndefIsContained(false) {} + + /// See AbstractState::isValidState(...) + bool isValidState() const override { return IsValidState.isValidState(); } + + /// See AbstractState::isAtFixpoint(...) + bool isAtFixpoint() const override { return IsValidState.isAtFixpoint(); } + + /// See AbstractState::indicatePessimisticFixpoint(...) + ChangeStatus indicatePessimisticFixpoint() override { + return IsValidState.indicatePessimisticFixpoint(); + } + + /// See AbstractState::indicateOptimisticFixpoint(...) + ChangeStatus indicateOptimisticFixpoint() override { + return IsValidState.indicateOptimisticFixpoint(); + } + + /// Return the assumed state + PotentialValuesState &getAssumed() { return *this; } + const PotentialValuesState &getAssumed() const { return *this; } + + /// Return this set. We should check whether this set is valid or not by + /// isValidState() before calling this function. + const SetTy &getAssumedSet() const { + assert(isValidState() && "This set shoud not be used when it is invalid!"); + return Set; + } + + /// Returns whether this state contains an undef value or not. + bool undefIsContained() const { + assert(isValidState() && "This flag shoud not be used when it is invalid!"); + return UndefIsContained; + } + + bool operator==(const PotentialValuesState &RHS) const { + if (isValidState() != RHS.isValidState()) + return false; + if (!isValidState() && !RHS.isValidState()) + return true; + if (undefIsContained() != RHS.undefIsContained()) + return false; + return Set == RHS.getAssumedSet(); + } + + /// Maximum number of potential values to be tracked. + /// This is set by -attributor-max-potential-values command line option + static unsigned MaxPotentialValues; + + /// Return empty set as the best state of potential values. + static PotentialValuesState getBestState() { + return PotentialValuesState(true); + } + + static PotentialValuesState getBestState(PotentialValuesState &PVS) { + return getBestState(); + } + + /// Return full set as the worst state of potential values. + static PotentialValuesState getWorstState() { + return PotentialValuesState(false); + } + + /// Union assumed set with the passed value. + void unionAssumed(const MemberTy &C) { insert(C); } + + /// Union assumed set with assumed set of the passed state \p PVS. + void unionAssumed(const PotentialValuesState &PVS) { unionWith(PVS); } + + /// Union assumed set with an undef value. + void unionAssumedWithUndef() { unionWithUndef(); } + + /// "Clamp" this state with \p PVS. + PotentialValuesState operator^=(const PotentialValuesState &PVS) { + IsValidState ^= PVS.IsValidState; + unionAssumed(PVS); + return *this; + } + + PotentialValuesState operator&=(const PotentialValuesState &PVS) { + IsValidState &= PVS.IsValidState; + unionAssumed(PVS); + return *this; + } + +private: + /// Check the size of this set, and invalidate when the size is no + /// less than \p MaxPotentialValues threshold. + void checkAndInvalidate() { + if (Set.size() >= MaxPotentialValues) + indicatePessimisticFixpoint(); + } + + /// If this state contains both undef and not undef, we can reduce + /// undef to the not undef value. + void reduceUndefValue() { UndefIsContained = UndefIsContained & Set.empty(); } + + /// Insert an element into this set. + void insert(const MemberTy &C) { + if (!isValidState()) + return; + Set.insert(C); + checkAndInvalidate(); + } + + /// Take union with R. + void unionWith(const PotentialValuesState &R) { + /// If this is a full set, do nothing.; + if (!isValidState()) + return; + /// If R is full set, change L to a full set. + if (!R.isValidState()) { + indicatePessimisticFixpoint(); + return; + } + for (const MemberTy &C : R.Set) + Set.insert(C); + UndefIsContained |= R.undefIsContained(); + reduceUndefValue(); + checkAndInvalidate(); + } + + /// Take union with an undef value. + void unionWithUndef() { + UndefIsContained = true; + reduceUndefValue(); + } + + /// Take intersection with R. + void intersectWith(const PotentialValuesState &R) { + /// If R is a full set, do nothing. + if (!R.isValidState()) + return; + /// If this is a full set, change this to R. + if (!isValidState()) { + *this = R; + return; + } + SetTy IntersectSet; + for (const MemberTy &C : Set) { + if (R.Set.count(C)) + IntersectSet.insert(C); + } + Set = IntersectSet; + UndefIsContained &= R.undefIsContained(); + reduceUndefValue(); + } + + /// A helper state which indicate whether this state is valid or not. + BooleanState IsValidState; + + /// Container for potential values + SetTy Set; + + /// Flag for undef value + bool UndefIsContained; +}; + +using PotentialConstantIntValuesState = PotentialValuesState<APInt>; + +raw_ostream &operator<<(raw_ostream &OS, + const PotentialConstantIntValuesState &R); + +/// An abstract interface for potential values analysis. +/// +/// This AA collects potential values for each IR position. +/// An assumed set of potential values is initialized with the empty set (the +/// best state) and it will grow monotonically as we find more potential values +/// for this position. +/// The set might be forced to the worst state, that is, to contain every +/// possible value for this position in 2 cases. +/// 1. We surpassed the \p MaxPotentialValues threshold. This includes the +/// case that this position is affected (e.g. because of an operation) by a +/// Value that is in the worst state. +/// 2. We tried to initialize on a Value that we cannot handle (e.g. an +/// operator we do not currently handle). +/// +/// TODO: Support values other than constant integers. +struct AAPotentialValues + : public StateWrapper<PotentialConstantIntValuesState, AbstractAttribute> { + using Base = StateWrapper<PotentialConstantIntValuesState, AbstractAttribute>; + AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + + /// See AbstractAttribute::getState(...). + PotentialConstantIntValuesState &getState() override { return *this; } + const PotentialConstantIntValuesState &getState() const override { + return *this; + } + + /// Create an abstract attribute view for the position \p IRP. + static AAPotentialValues &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// Return assumed constant for the associated value + Optional<ConstantInt *> + getAssumedConstantInt(Attributor &A, + const Instruction *CtxI = nullptr) const { + if (!isValidState()) + return nullptr; + if (getAssumedSet().size() == 1) + return cast<ConstantInt>(ConstantInt::get(getAssociatedValue().getType(), + *(getAssumedSet().begin()))); + if (getAssumedSet().size() == 0) { + if (undefIsContained()) + return cast<ConstantInt>( + ConstantInt::get(getAssociatedValue().getType(), 0)); + return llvm::None; + } + + return nullptr; + } + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AAPotentialValues"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAPotentialValues + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; +}; + +/// An abstract interface for all noundef attributes. +struct AANoUndef + : public IRAttribute<Attribute::NoUndef, + StateWrapper<BooleanState, AbstractAttribute>> { + AANoUndef(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} + + /// Return true if we assume that the underlying value is noundef. + bool isAssumedNoUndef() const { return getAssumed(); } + + /// Return true if we know that underlying value is noundef. + bool isKnownNoUndef() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AANoUndef &createForPosition(const IRPosition &IRP, Attributor &A); + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AANoUndef"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AANoUndef + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; +}; + /// Run options, used by the pass manager. enum AttributorRunOption { NONE = 0, diff --git a/llvm/include/llvm/Transforms/IPO/BlockExtractor.h b/llvm/include/llvm/Transforms/IPO/BlockExtractor.h new file mode 100644 index 000000000000..deeb5ebe23d9 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/BlockExtractor.h @@ -0,0 +1,25 @@ +//===- BlockExtractor.h - Extracts blocks into their own functions --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass extracts the specified basic blocks from the module into their +// own functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_BLOCKEXTRACTOR_H +#define LLVM_TRANSFORMS_IPO_BLOCKEXTRACTOR_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +struct BlockExtractorPass : PassInfoMixin<BlockExtractorPass> { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_BLOCKEXTRACTOR_H diff --git a/llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h b/llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h index c2626d0867b4..782633799ede 100644 --- a/llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h +++ b/llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h @@ -19,7 +19,6 @@ #ifndef LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H #define LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h b/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h index 8440df639729..d34a51081101 100644 --- a/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h +++ b/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h @@ -14,7 +14,6 @@ #ifndef LLVM_TRANSFORMS_IPO_CROSSDSOCFI_H #define LLVM_TRANSFORMS_IPO_CROSSDSOCFI_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h b/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h index 73797bc10017..496ceea12bc9 100644 --- a/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h +++ b/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h @@ -128,6 +128,7 @@ private: Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses); void SurveyFunction(const Function &F); + bool IsLive(const RetOrArg &RA); void MarkValue(const RetOrArg &RA, Liveness L, const UseVector &MaybeLiveUses); void MarkLive(const RetOrArg &RA); diff --git a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h index 7379009b2592..fd99843d0449 100644 --- a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h +++ b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h @@ -13,7 +13,6 @@ #ifndef LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H #define LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h new file mode 100644 index 000000000000..eefcbe5235c1 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -0,0 +1,358 @@ +//===- IROutliner.h - Extract similar IR regions into functions ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// The interface file for the IROutliner which is used by the IROutliner Pass. +// +// The outliner uses the IRSimilarityIdentifier to identify the similar regions +// of code. It evaluates each set of IRSimilarityCandidates with an estimate of +// whether it will provide code size reduction. Each region is extracted using +// the code extractor. These extracted functions are consolidated into a single +// function and called from the extracted call site. +// +// For example: +// \code +// %1 = add i32 %a, %b +// %2 = add i32 %b, %a +// %3 = add i32 %b, %a +// %4 = add i32 %a, %b +// \endcode +// would become function +// \code +// define internal void outlined_ir_function(i32 %0, i32 %1) { +// %1 = add i32 %0, %1 +// %2 = add i32 %1, %0 +// ret void +// } +// \endcode +// with calls: +// \code +// call void outlined_ir_function(i32 %a, i32 %b) +// call void outlined_ir_function(i32 %b, i32 %a) +// \endcode +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H +#define LLVM_TRANSFORMS_IPO_IROUTLINER_H + +#include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/Support/InstructionCost.h" +#include "llvm/Transforms/Utils/CodeExtractor.h" +#include <set> + +struct OutlinableGroup; + +namespace llvm { +using namespace IRSimilarity; + +class Module; +class TargetTransformInfo; +class OptimizationRemarkEmitter; + +/// The OutlinableRegion holds all the information for a specific region, or +/// sequence of instructions. This includes what values need to be hoisted to +/// arguments from the extracted function, inputs and outputs to the region, and +/// mapping from the extracted function arguments to overall function arguments. +struct OutlinableRegion { + /// Describes the region of code. + IRSimilarityCandidate *Candidate; + + /// If this region is outlined, the front and back IRInstructionData could + /// potentially become invalidated if the only new instruction is a call. + /// This ensures that we replace in the instruction in the IRInstructionData. + IRInstructionData *NewFront = nullptr; + IRInstructionData *NewBack = nullptr; + + /// The number of extracted inputs from the CodeExtractor. + unsigned NumExtractedInputs; + + /// The corresponding BasicBlock with the appropriate stores for this + /// OutlinableRegion in the overall function. + unsigned OutputBlockNum; + + /// Mapping the extracted argument number to the argument number in the + /// overall function. Since there will be inputs, such as elevated constants + /// that are not the same in each region in a SimilarityGroup, or values that + /// cannot be sunk into the extracted section in every region, we must keep + /// track of which extracted argument maps to which overall argument. + DenseMap<unsigned, unsigned> ExtractedArgToAgg; + DenseMap<unsigned, unsigned> AggArgToExtracted; + + /// Mapping of the argument number in the deduplicated function + /// to a given constant, which is used when creating the arguments to the call + /// to the newly created deduplicated function. This is handled separately + /// since the CodeExtractor does not recognize constants. + DenseMap<unsigned, Constant *> AggArgToConstant; + + /// The global value numbers that are used as outputs for this section. Once + /// extracted, each output will be stored to an output register. This + /// documents the global value numbers that are used in this pattern. + SmallVector<unsigned, 4> GVNStores; + + /// Used to create an outlined function. + CodeExtractor *CE = nullptr; + + /// The call site of the extracted region. + CallInst *Call = nullptr; + + /// The function for the extracted region. + Function *ExtractedFunction = nullptr; + + /// Flag for whether we have split out the IRSimilarityCanidate. That is, + /// make the region contained the IRSimilarityCandidate its own BasicBlock. + bool CandidateSplit = false; + + /// Flag for whether we should not consider this region for extraction. + bool IgnoreRegion = false; + + /// The BasicBlock that is before the start of the region BasicBlock, + /// only defined when the region has been split. + BasicBlock *PrevBB = nullptr; + + /// The BasicBlock that contains the starting instruction of the region. + BasicBlock *StartBB = nullptr; + + /// The BasicBlock that contains the ending instruction of the region. + BasicBlock *EndBB = nullptr; + + /// The BasicBlock that is after the start of the region BasicBlock, + /// only defined when the region has been split. + BasicBlock *FollowBB = nullptr; + + /// The Outlinable Group that contains this region and structurally similar + /// regions to this region. + OutlinableGroup *Parent = nullptr; + + OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group) + : Candidate(&C), Parent(&Group) { + StartBB = C.getStartBB(); + EndBB = C.getEndBB(); + } + + /// For the contained region, split the parent BasicBlock at the starting and + /// ending instructions of the contained IRSimilarityCandidate. + void splitCandidate(); + + /// For the contained region, reattach the BasicBlock at the starting and + /// ending instructions of the contained IRSimilarityCandidate, or if the + /// function has been extracted, the start and end of the BasicBlock + /// containing the called function. + void reattachCandidate(); + + /// Get the size of the code removed from the region. + /// + /// \param [in] TTI - The TargetTransformInfo for the parent function. + /// \returns the code size of the region + InstructionCost getBenefit(TargetTransformInfo &TTI); +}; + +/// This class is a pass that identifies similarity in a Module, extracts +/// instances of the similarity, and then consolidating the similar regions +/// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass +/// to identify the similar regions of code, and then extracts the similar +/// sections into a single function. See the above for an example as to +/// how code is extracted and consolidated into a single function. +class IROutliner { +public: + IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI, + function_ref<IRSimilarityIdentifier &(Module &)> GIRSI, + function_ref<OptimizationRemarkEmitter &(Function &)> GORE) + : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {} + bool run(Module &M); + +private: + /// Find repeated similar code sequences in \p M and outline them into new + /// Functions. + /// + /// \param [in] M - The module to outline from. + /// \returns The number of Functions created. + unsigned doOutline(Module &M); + + /// Remove all the IRSimilarityCandidates from \p CandidateVec that have + /// instructions contained in a previously outlined region and put the + /// remaining regions in \p CurrentGroup. + /// + /// \param [in] CandidateVec - List of similarity candidates for regions with + /// the same similarity structure. + /// \param [in,out] CurrentGroup - Contains the potential sections to + /// be outlined. + void + pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec, + OutlinableGroup &CurrentGroup); + + /// Create the function based on the overall types found in the current + /// regions being outlined. + /// + /// \param M - The module to outline from. + /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined. + /// \param [in] FunctionNameSuffix - How many functions have we previously + /// created. + /// \returns the newly created function. + Function *createFunction(Module &M, OutlinableGroup &CG, + unsigned FunctionNameSuffix); + + /// Identify the needed extracted inputs in a section, and add to the overall + /// function if needed. + /// + /// \param [in] M - The module to outline from. + /// \param [in,out] Region - The region to be extracted. + /// \param [in] NotSame - The global value numbers of the Values in the region + /// that do not have the same Constant in each strucutrally similar region. + void findAddInputsOutputs(Module &M, OutlinableRegion &Region, + DenseSet<unsigned> &NotSame); + + /// Find the number of instructions that will be removed by extracting the + /// OutlinableRegions in \p CurrentGroup. + /// + /// \param [in] CurrentGroup - The collection of OutlinableRegions to be + /// analyzed. + /// \returns the number of outlined instructions across all regions. + InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup); + + /// Find the number of instructions that will be added by reloading arguments. + /// + /// \param [in] CurrentGroup - The collection of OutlinableRegions to be + /// analyzed. + /// \returns the number of added reload instructions across all regions. + InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup); + + /// Find the cost and the benefit of \p CurrentGroup and save it back to + /// \p CurrentGroup. + /// + /// \param [in] M - The module being analyzed + /// \param [in,out] CurrentGroup - The overall outlined section + void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup); + + /// Update the output mapping based on the load instruction, and the outputs + /// of the extracted function. + /// + /// \param Region - The region extracted + /// \param Outputs - The outputs from the extracted function. + /// \param LI - The load instruction used to update the mapping. + void updateOutputMapping(OutlinableRegion &Region, + ArrayRef<Value *> Outputs, LoadInst *LI); + + /// Extract \p Region into its own function. + /// + /// \param [in] Region - The region to be extracted into its own function. + /// \returns True if it was successfully outlined. + bool extractSection(OutlinableRegion &Region); + + /// For the similarities found, and the extracted sections, create a single + /// outlined function with appropriate output blocks as necessary. + /// + /// \param [in] M - The module to outline from + /// \param [in] CurrentGroup - The set of extracted sections to consolidate. + /// \param [in,out] FuncsToRemove - List of functions to remove from the + /// module after outlining is completed. + /// \param [in,out] OutlinedFunctionNum - the number of new outlined + /// functions. + void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup, + std::vector<Function *> &FuncsToRemove, + unsigned &OutlinedFunctionNum); + + /// If true, enables us to outline from functions that have LinkOnceFromODR + /// linkages. + bool OutlineFromLinkODRs = false; + + /// If false, we do not worry if the cost is greater than the benefit. This + /// is for debugging and testing, so that we can test small cases to ensure + /// that the outlining is being done correctly. + bool CostModel = true; + + /// The set of outlined Instructions, identified by their location in the + /// sequential ordering of instructions in a Module. + DenseSet<unsigned> Outlined; + + /// TargetTransformInfo lambda for target specific information. + function_ref<TargetTransformInfo &(Function &)> getTTI; + + /// A mapping from newly created reloaded output values to the original value. + /// If an value is replace by an output from an outlined region, this maps + /// that Value, back to its original Value. + DenseMap<Value *, Value *> OutputMappings; + + /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier. + function_ref<IRSimilarityIdentifier &(Module &)> getIRSI; + + /// The optimization remark emitter for the pass. + function_ref<OptimizationRemarkEmitter &(Function &)> getORE; + + /// The memory allocator used to allocate the CodeExtractors. + SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator; + + /// The memory allocator used to allocate the OutlinableRegions. + SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator; + + /// The memory allocator used to allocate new IRInstructionData. + SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator; + + /// Custom InstVisitor to classify different instructions for whether it can + /// be analyzed for similarity. This is needed as there may be instruction we + /// can identify as having similarity, but are more complicated to outline. + struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> { + InstructionAllowed() {} + + // TODO: Determine a scheme to resolve when the label is similar enough. + bool visitBranchInst(BranchInst &BI) { return false; } + // TODO: Determine a scheme to resolve when the labels are similar enough. + bool visitPHINode(PHINode &PN) { return false; } + // TODO: Handle allocas. + bool visitAllocaInst(AllocaInst &AI) { return false; } + // VAArg instructions are not allowed since this could cause difficulty when + // differentiating between different sets of variable instructions in + // the deduplicated outlined regions. + bool visitVAArgInst(VAArgInst &VI) { return false; } + // We exclude all exception handling cases since they are so context + // dependent. + bool visitLandingPadInst(LandingPadInst &LPI) { return false; } + bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; } + // DebugInfo should be included in the regions, but should not be + // analyzed for similarity as it has no bearing on the outcome of the + // program. + bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; } + // TODO: Handle specific intrinsics individually from those that can be + // handled. + bool IntrinsicInst(IntrinsicInst &II) { return false; } + // We only handle CallInsts that are not indirect, since we cannot guarantee + // that they have a name in these cases. + bool visitCallInst(CallInst &CI) { + Function *F = CI.getCalledFunction(); + if (!F || CI.isIndirectCall() || !F->hasName()) + return false; + return true; + } + // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside + // the outlined region, and then returned as an output, this will have to be + // handled differently. + bool visitFreezeInst(FreezeInst &CI) { return false; } + // TODO: We do not current handle similarity that changes the control flow. + bool visitInvokeInst(InvokeInst &II) { return false; } + // TODO: We do not current handle similarity that changes the control flow. + bool visitCallBrInst(CallBrInst &CBI) { return false; } + // TODO: Handle interblock similarity. + bool visitTerminator(Instruction &I) { return false; } + bool visitInstruction(Instruction &I) { return true; } + }; + + /// A InstVisitor used to exclude certain instructions from being outlined. + InstructionAllowed InstructionClassifier; +}; + +/// Pass to outline similar regions. +class IROutlinerPass : public PassInfoMixin<IROutlinerPass> { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h index 3454b0af0d9f..21ff86994ce1 100644 --- a/llvm/include/llvm/Transforms/IPO/Inliner.h +++ b/llvm/include/llvm/Transforms/IPO/Inliner.h @@ -14,8 +14,9 @@ #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/ReplayInlineAdvisor.h" +#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" #include <utility> namespace llvm { @@ -96,10 +97,8 @@ protected: /// passes be composed to achieve the same end result. class InlinerPass : public PassInfoMixin<InlinerPass> { public: - InlinerPass() = default; - ~InlinerPass(); - InlinerPass(InlinerPass &&Arg) - : ImportedFunctionsStats(std::move(Arg.ImportedFunctionsStats)) {} + InlinerPass(bool OnlyMandatory = false) : OnlyMandatory(OnlyMandatory) {} + InlinerPass(InlinerPass &&Arg) = default; PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); @@ -107,8 +106,8 @@ public: private: InlineAdvisor &getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, FunctionAnalysisManager &FAM, Module &M); - std::unique_ptr<ImportedFunctionsInliningStatistics> ImportedFunctionsStats; - Optional<DefaultInlineAdvisor> OwnedDefaultAdvisor; + std::unique_ptr<InlineAdvisor> OwnedAdvisor; + const bool OnlyMandatory; }; /// Module pass, wrapping the inliner pass. This works in conjunction with the @@ -121,6 +120,7 @@ class ModuleInlinerWrapperPass public: ModuleInlinerWrapperPass( InlineParams Params = getInlineParams(), bool Debugging = false, + bool MandatoryFirst = true, InliningAdvisorMode Mode = InliningAdvisorMode::Default, unsigned MaxDevirtIterations = 0); ModuleInlinerWrapperPass(ModuleInlinerWrapperPass &&Arg) = default; diff --git a/llvm/include/llvm/Transforms/IPO/LoopExtractor.h b/llvm/include/llvm/Transforms/IPO/LoopExtractor.h new file mode 100644 index 000000000000..def3c5943919 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/LoopExtractor.h @@ -0,0 +1,32 @@ +//===- LoopExtractor.h - Extract each loop into a new function ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A pass wrapper around the ExtractLoop() scalar transformation to extract each +// top-level loop into its own new function. If the loop is the ONLY loop in a +// given function, it is not touched. This is a pass most useful for debugging +// via bugpoint. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_LOOPEXTRACTOR_H +#define LLVM_TRANSFORMS_IPO_LOOPEXTRACTOR_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct LoopExtractorPass : public PassInfoMixin<LoopExtractorPass> { + LoopExtractorPass(unsigned NumLoops = ~0) : NumLoops(NumLoops) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + unsigned NumLoops; +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_LOOPEXTRACTOR_H diff --git a/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h b/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h index 5e91ae599363..eb682c437b94 100644 --- a/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h +++ b/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h @@ -198,10 +198,14 @@ bool isJumpTableCanonical(Function *F); } // end namespace lowertypetests class LowerTypeTestsPass : public PassInfoMixin<LowerTypeTestsPass> { + bool UseCommandLine = false; + + ModuleSummaryIndex *ExportSummary = nullptr; + const ModuleSummaryIndex *ImportSummary = nullptr; + bool DropTypeTests = true; + public: - ModuleSummaryIndex *ExportSummary; - const ModuleSummaryIndex *ImportSummary; - bool DropTypeTests; + LowerTypeTestsPass() : UseCommandLine(true) {} LowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary, bool DropTypeTests = false) diff --git a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h index d96187b73f9b..9b72ee0afd28 100644 --- a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h +++ b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h @@ -33,6 +33,11 @@ struct OpenMPInModule { bool isKnown() { return Value != OpenMP::UNKNOWN; } operator bool() { return Value != OpenMP::NOT_FOUND; } + /// Does this function \p F contain any OpenMP runtime calls? + bool containsOMPRuntimeCalls(Function *F) const { + return FuncsWithOMPRuntimeCalls.contains(F); + } + /// Return the known kernels (=GPU entry points) in the module. SmallPtrSetImpl<Kernel> &getKernels() { return Kernels; } @@ -42,6 +47,11 @@ struct OpenMPInModule { private: enum class OpenMP { FOUND, NOT_FOUND, UNKNOWN } Value = OpenMP::UNKNOWN; + friend bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule); + + /// In which functions are OpenMP runtime calls present? + SmallPtrSet<Function *, 32> FuncsWithOMPRuntimeCalls; + /// Collection of known kernels (=GPU entry points) in the module. SmallPtrSet<Kernel, 8> Kernels; }; diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h new file mode 100644 index 000000000000..5b2600144fa3 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -0,0 +1,141 @@ +//===- Transforms/IPO/SampleContextTracker.h --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file provides the interface for context-sensitive profile tracker used +/// by CSSPGO. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H +#define LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H + +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Instructions.h" +#include "llvm/ProfileData/SampleProf.h" +#include <list> +#include <map> + +using namespace llvm; +using namespace sampleprof; + +namespace llvm { + +// Internal trie tree representation used for tracking context tree and sample +// profiles. The path from root node to a given node represents the context of +// that nodes' profile. +class ContextTrieNode { +public: + ContextTrieNode(ContextTrieNode *Parent = nullptr, + StringRef FName = StringRef(), + FunctionSamples *FSamples = nullptr, + LineLocation CallLoc = {0, 0}) + : ParentContext(Parent), FuncName(FName), FuncSamples(FSamples), + CallSiteLoc(CallLoc){}; + ContextTrieNode *getChildContext(const LineLocation &CallSite, + StringRef CalleeName); + ContextTrieNode *getChildContext(const LineLocation &CallSite); + ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite, + StringRef CalleeName, + bool AllowCreate = true); + + ContextTrieNode &moveToChildContext(const LineLocation &CallSite, + ContextTrieNode &&NodeToMove, + StringRef ContextStrToRemove, + bool DeleteNode = true); + void removeChildContext(const LineLocation &CallSite, StringRef CalleeName); + std::map<uint32_t, ContextTrieNode> &getAllChildContext(); + const StringRef getFuncName() const; + FunctionSamples *getFunctionSamples() const; + void setFunctionSamples(FunctionSamples *FSamples); + LineLocation getCallSiteLoc() const; + ContextTrieNode *getParentContext() const; + void setParentContext(ContextTrieNode *Parent); + void dump(); + +private: + static uint32_t nodeHash(StringRef ChildName, const LineLocation &Callsite); + + // Map line+discriminator location to child context + std::map<uint32_t, ContextTrieNode> AllChildContext; + + // Link to parent context node + ContextTrieNode *ParentContext; + + // Function name for current context + StringRef FuncName; + + // Function Samples for current context + FunctionSamples *FuncSamples; + + // Callsite location in parent context + LineLocation CallSiteLoc; +}; + +// Profile tracker that manages profiles and its associated context. It +// provides interfaces used by sample profile loader to query context profile or +// base profile for given function or location; it also manages context tree +// manipulation that is needed to accommodate inline decisions so we have +// accurate post-inline profile for functions. Internally context profiles +// are organized in a trie, with each node representing profile for specific +// calling context and the context is identified by path from root to the node. +class SampleContextTracker { +public: + SampleContextTracker(StringMap<FunctionSamples> &Profiles); + // Query context profile for a specific callee with given name at a given + // call-site. The full context is identified by location of call instruction. + FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, + StringRef CalleeName); + // Query context profile for a given location. The full context + // is identified by input DILocation. + FunctionSamples *getContextSamplesFor(const DILocation *DIL); + // Query context profile for a given sample contxt of a function. + FunctionSamples *getContextSamplesFor(const SampleContext &Context); + // Query base profile for a given function. A base profile is a merged view + // of all context profiles for contexts that are not inlined. + FunctionSamples *getBaseSamplesFor(const Function &Func, + bool MergeContext = true); + // Query base profile for a given function by name. + FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext); + // Mark a context profile as inlined when function is inlined. + // This makes sure that inlined context profile will be excluded in + // function's base profile. + void markContextSamplesInlined(const FunctionSamples *InlinedSamples); + // Dump the internal context profile trie. + void dump(); + +private: + ContextTrieNode *getContextFor(const DILocation *DIL); + ContextTrieNode *getContextFor(const SampleContext &Context); + ContextTrieNode *getCalleeContextFor(const DILocation *DIL, + StringRef CalleeName); + ContextTrieNode *getOrCreateContextPath(const SampleContext &Context, + bool AllowCreate); + ContextTrieNode *getTopLevelContextNode(StringRef FName); + ContextTrieNode &addTopLevelContextNode(StringRef FName); + ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo); + void promoteMergeContextSamplesTree(const Instruction &Inst, + StringRef CalleeName); + void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode, + StringRef ContextStrToRemove); + ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &FromNode, + ContextTrieNode &ToNodeParent, + StringRef ContextStrToRemove); + + // Map from function name to context profiles (excluding base profile) + StringMap<SmallSet<FunctionSamples *, 16>> FuncToCtxtProfileSet; + + // Root node for context trie tree + ContextTrieNode RootContext; +}; + +} // end namespace llvm +#endif // LLVM_TRANSFORMS_IPO_SAMPLECONTEXTTRACKER_H diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfile.h b/llvm/include/llvm/Transforms/IPO/SampleProfile.h index a5ad44551bf6..3d929b974044 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfile.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfile.h @@ -24,17 +24,18 @@ class Module; /// The sample profiler data loader pass. class SampleProfileLoaderPass : public PassInfoMixin<SampleProfileLoaderPass> { public: - SampleProfileLoaderPass(std::string File = "", std::string RemappingFile = "", - bool IsThinLTOPreLink = false) + SampleProfileLoaderPass( + std::string File = "", std::string RemappingFile = "", + ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None) : ProfileFileName(File), ProfileRemappingFileName(RemappingFile), - IsThinLTOPreLink(IsThinLTOPreLink) {} + LTOPhase(LTOPhase) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: std::string ProfileFileName; std::string ProfileRemappingFileName; - bool IsThinLTOPreLink; + ThinOrFullLTOPhase LTOPhase; }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h new file mode 100644 index 000000000000..78117fd4a9c2 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -0,0 +1,106 @@ +//===- Transforms/IPO/SampleProfileProbe.h ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file provides the interface for the pseudo probe implementation for +/// AutoFDO. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H +#define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/PseudoProbe.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/Target/TargetMachine.h" +#include <unordered_map> + +namespace llvm { + +class Module; + +using namespace sampleprof; +using BlockIdMap = std::unordered_map<BasicBlock *, uint32_t>; +using InstructionIdMap = std::unordered_map<Instruction *, uint32_t>; + +enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid }; + +class PseudoProbeDescriptor { + uint64_t FunctionGUID; + uint64_t FunctionHash; + +public: + PseudoProbeDescriptor(uint64_t GUID, uint64_t Hash) + : FunctionGUID(GUID), FunctionHash(Hash) {} + uint64_t getFunctionGUID() const { return FunctionGUID; } + uint64_t getFunctionHash() const { return FunctionHash; } +}; + +// This class serves sample counts correlation for SampleProfileLoader by +// analyzing pseudo probes and their function descriptors injected by +// SampleProfileProber. +class PseudoProbeManager { + DenseMap<uint64_t, PseudoProbeDescriptor> GUIDToProbeDescMap; + + const PseudoProbeDescriptor *getDesc(const Function &F) const; + +public: + PseudoProbeManager(const Module &M); + bool moduleIsProbed(const Module &M) const; + bool profileIsValid(const Function &F, const FunctionSamples &Samples) const; +}; + +/// Sample profile pseudo prober. +/// +/// Insert pseudo probes for block sampling and value sampling. +class SampleProfileProber { +public: + // Give an empty module id when the prober is not used for instrumentation. + SampleProfileProber(Function &F, const std::string &CurModuleUniqueId); + void instrumentOneFunc(Function &F, TargetMachine *TM); + +private: + Function *getFunction() const { return F; } + uint64_t getFunctionHash() const { return FunctionHash; } + uint32_t getBlockId(const BasicBlock *BB) const; + uint32_t getCallsiteId(const Instruction *Call) const; + void computeCFGHash(); + void computeProbeIdForBlocks(); + void computeProbeIdForCallsites(); + + Function *F; + + /// The current module ID that is used to name a static object as a comdat + /// group. + std::string CurModuleUniqueId; + + /// A CFG hash code used to identify a function code changes. + uint64_t FunctionHash; + + /// Map basic blocks to the their pseudo probe ids. + BlockIdMap BlockProbeIds; + + /// Map indirect calls to the their pseudo probe ids. + InstructionIdMap CallProbeIds; + + /// The ID of the last probe, Can be used to number a new probe. + uint32_t LastProbeId; +}; + +class SampleProfileProbePass : public PassInfoMixin<SampleProfileProbePass> { + TargetMachine *TM; + +public: + SampleProfileProbePass(TargetMachine *TM) : TM(TM) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm +#endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H diff --git a/llvm/include/llvm/Transforms/IPO/StripSymbols.h b/llvm/include/llvm/Transforms/IPO/StripSymbols.h new file mode 100644 index 000000000000..dd76d481d668 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/StripSymbols.h @@ -0,0 +1,47 @@ +//===- StripSymbols.h - Strip symbols and debug info from a module --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The StripSymbols transformation implements code stripping. Specifically, it +// can delete: +// +// * names for virtual registers +// * symbols for internal globals and functions +// * debug information +// +// Note that this transformation makes code much less readable, so it should +// only be used in situations where the 'strip' utility would be used, such as +// reducing code size or making it harder to reverse engineer code. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_STRIPSYMBOLS_H +#define LLVM_TRANSFORMS_IPO_STRIPSYMBOLS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct StripSymbolsPass : PassInfoMixin<StripSymbolsPass> { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +struct StripNonDebugSymbolsPass : PassInfoMixin<StripNonDebugSymbolsPass> { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +struct StripDebugDeclarePass : PassInfoMixin<StripDebugDeclarePass> { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +struct StripDeadDebugInfoPass : PassInfoMixin<StripDeadDebugInfoPass> { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_STRIPSYMBOLS_H diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h index 86e28cfead80..6e92f8fd3f0d 100644 --- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -223,6 +223,9 @@ void setAfterReturnValues(MutableArrayRef<VirtualCallTarget> Targets, struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> { ModuleSummaryIndex *ExportSummary; const ModuleSummaryIndex *ImportSummary; + bool UseCommandLine = false; + WholeProgramDevirtPass() + : ExportSummary(nullptr), ImportSummary(nullptr), UseCommandLine(true) {} WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary) : ExportSummary(ExportSummary), ImportSummary(ImportSummary) { diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h new file mode 100644 index 000000000000..aae0694e4cab --- /dev/null +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -0,0 +1,528 @@ +//===- InstCombiner.h - InstCombine implementation --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file provides the interface for the instcombine pass implementation. +/// The interface is used for generic transformations in this folder and +/// target specific combinations in the targets. +/// The visitor implementation is in \c InstCombinerImpl in +/// \c InstCombineInternal.h. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINER_H +#define LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINER_H + +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetFolder.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Transforms/InstCombine/InstCombineWorklist.h" +#include <cassert> + +#define DEBUG_TYPE "instcombine" + +namespace llvm { + +class AAResults; +class AssumptionCache; +class ProfileSummaryInfo; +class TargetLibraryInfo; +class TargetTransformInfo; + +/// The core instruction combiner logic. +/// +/// This class provides both the logic to recursively visit instructions and +/// combine them. +class LLVM_LIBRARY_VISIBILITY InstCombiner { + /// Only used to call target specific inst combining. + TargetTransformInfo &TTI; + +public: + /// Maximum size of array considered when transforming. + uint64_t MaxArraySizeForCombine = 0; + + /// An IRBuilder that automatically inserts new instructions into the + /// worklist. + using BuilderTy = IRBuilder<TargetFolder, IRBuilderCallbackInserter>; + BuilderTy &Builder; + +protected: + /// A worklist of the instructions that need to be simplified. + InstCombineWorklist &Worklist; + + // Mode in which we are running the combiner. + const bool MinimizeSize; + + AAResults *AA; + + // Required analyses. + AssumptionCache &AC; + TargetLibraryInfo &TLI; + DominatorTree &DT; + const DataLayout &DL; + const SimplifyQuery SQ; + OptimizationRemarkEmitter &ORE; + BlockFrequencyInfo *BFI; + ProfileSummaryInfo *PSI; + + // Optional analyses. When non-null, these can both be used to do better + // combining and will be updated to reflect any changes. + LoopInfo *LI; + + bool MadeIRChange = false; + +public: + InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, + bool MinimizeSize, AAResults *AA, AssumptionCache &AC, + TargetLibraryInfo &TLI, TargetTransformInfo &TTI, + DominatorTree &DT, OptimizationRemarkEmitter &ORE, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, + const DataLayout &DL, LoopInfo *LI) + : TTI(TTI), Builder(Builder), Worklist(Worklist), + MinimizeSize(MinimizeSize), AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL), + SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {} + + virtual ~InstCombiner() {} + + /// Return the source operand of a potentially bitcasted value while + /// optionally checking if it has one use. If there is no bitcast or the one + /// use check is not met, return the input value itself. + static Value *peekThroughBitcast(Value *V, bool OneUseOnly = false) { + if (auto *BitCast = dyn_cast<BitCastInst>(V)) + if (!OneUseOnly || BitCast->hasOneUse()) + return BitCast->getOperand(0); + + // V is not a bitcast or V has more than one use and OneUseOnly is true. + return V; + } + + /// Assign a complexity or rank value to LLVM Values. This is used to reduce + /// the amount of pattern matching needed for compares and commutative + /// instructions. For example, if we have: + /// icmp ugt X, Constant + /// or + /// xor (add X, Constant), cast Z + /// + /// We do not have to consider the commuted variants of these patterns because + /// canonicalization based on complexity guarantees the above ordering. + /// + /// This routine maps IR values to various complexity ranks: + /// 0 -> undef + /// 1 -> Constants + /// 2 -> Other non-instructions + /// 3 -> Arguments + /// 4 -> Cast and (f)neg/not instructions + /// 5 -> Other instructions + static unsigned getComplexity(Value *V) { + if (isa<Instruction>(V)) { + if (isa<CastInst>(V) || match(V, m_Neg(PatternMatch::m_Value())) || + match(V, m_Not(PatternMatch::m_Value())) || + match(V, m_FNeg(PatternMatch::m_Value()))) + return 4; + return 5; + } + if (isa<Argument>(V)) + return 3; + return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2; + } + + /// Predicate canonicalization reduces the number of patterns that need to be + /// matched by other transforms. For example, we may swap the operands of a + /// conditional branch or select to create a compare with a canonical + /// (inverted) predicate which is then more likely to be matched with other + /// values. + static bool isCanonicalPredicate(CmpInst::Predicate Pred) { + switch (Pred) { + case CmpInst::ICMP_NE: + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_SLE: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SGE: + // TODO: There are 16 FCMP predicates. Should others be (not) canonical? + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_OLE: + case CmpInst::FCMP_OGE: + return false; + default: + return true; + } + } + + /// Given an exploded icmp instruction, return true if the comparison only + /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if + /// the result of the comparison is true when the input value is signed. + static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS, + bool &TrueIfSigned) { + switch (Pred) { + case ICmpInst::ICMP_SLT: // True if LHS s< 0 + TrueIfSigned = true; + return RHS.isNullValue(); + case ICmpInst::ICMP_SLE: // True if LHS s<= -1 + TrueIfSigned = true; + return RHS.isAllOnesValue(); + case ICmpInst::ICMP_SGT: // True if LHS s> -1 + TrueIfSigned = false; + return RHS.isAllOnesValue(); + case ICmpInst::ICMP_SGE: // True if LHS s>= 0 + TrueIfSigned = false; + return RHS.isNullValue(); + case ICmpInst::ICMP_UGT: + // True if LHS u> RHS and RHS == sign-bit-mask - 1 + TrueIfSigned = true; + return RHS.isMaxSignedValue(); + case ICmpInst::ICMP_UGE: + // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = true; + return RHS.isMinSignedValue(); + case ICmpInst::ICMP_ULT: + // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc) + TrueIfSigned = false; + return RHS.isMinSignedValue(); + case ICmpInst::ICMP_ULE: + // True if LHS u<= RHS and RHS == sign-bit-mask - 1 + TrueIfSigned = false; + return RHS.isMaxSignedValue(); + default: + return false; + } + } + + /// Add one to a Constant + static Constant *AddOne(Constant *C) { + return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); + } + + /// Subtract one from a Constant + static Constant *SubOne(Constant *C) { + return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); + } + + llvm::Optional<std::pair< + CmpInst::Predicate, + Constant *>> static getFlippedStrictnessPredicateAndConstant(CmpInst:: + Predicate + Pred, + Constant *C); + + static bool shouldAvoidAbsorbingNotIntoSelect(const SelectInst &SI) { + // a ? b : false and a ? true : b are the canonical form of logical and/or. + // This includes !a ? b : false and !a ? true : b. Absorbing the not into + // the select by swapping operands would break recognition of this pattern + // in other analyses, so don't do that. + return match(&SI, PatternMatch::m_LogicalAnd(PatternMatch::m_Value(), + PatternMatch::m_Value())) || + match(&SI, PatternMatch::m_LogicalOr(PatternMatch::m_Value(), + PatternMatch::m_Value())); + } + + /// Return true if the specified value is free to invert (apply ~ to). + /// This happens in cases where the ~ can be eliminated. If WillInvertAllUses + /// is true, work under the assumption that the caller intends to remove all + /// uses of V and only keep uses of ~V. + /// + /// See also: canFreelyInvertAllUsersOf() + static bool isFreeToInvert(Value *V, bool WillInvertAllUses) { + // ~(~(X)) -> X. + if (match(V, m_Not(PatternMatch::m_Value()))) + return true; + + // Constants can be considered to be not'ed values. + if (match(V, PatternMatch::m_AnyIntegralConstant())) + return true; + + // Compares can be inverted if all of their uses are being modified to use + // the ~V. + if (isa<CmpInst>(V)) + return WillInvertAllUses; + + // If `V` is of the form `A + Constant` then `-1 - V` can be folded into + // `(-1 - Constant) - A` if we are willing to invert all of the uses. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) + if (BO->getOpcode() == Instruction::Add || + BO->getOpcode() == Instruction::Sub) + if (isa<Constant>(BO->getOperand(0)) || + isa<Constant>(BO->getOperand(1))) + return WillInvertAllUses; + + // Selects with invertible operands are freely invertible + if (match(V, + m_Select(PatternMatch::m_Value(), m_Not(PatternMatch::m_Value()), + m_Not(PatternMatch::m_Value())))) + return WillInvertAllUses; + + return false; + } + + /// Given i1 V, can every user of V be freely adapted if V is changed to !V ? + /// InstCombine's freelyInvertAllUsersOf() must be kept in sync with this fn. + /// + /// See also: isFreeToInvert() + static bool canFreelyInvertAllUsersOf(Value *V, Value *IgnoredUser) { + // Look at every user of V. + for (Use &U : V->uses()) { + if (U.getUser() == IgnoredUser) + continue; // Don't consider this user. + + auto *I = cast<Instruction>(U.getUser()); + switch (I->getOpcode()) { + case Instruction::Select: + if (U.getOperandNo() != 0) // Only if the value is used as select cond. + return false; + if (shouldAvoidAbsorbingNotIntoSelect(*cast<SelectInst>(I))) + return false; + break; + case Instruction::Br: + assert(U.getOperandNo() == 0 && "Must be branching on that value."); + break; // Free to invert by swapping true/false values/destinations. + case Instruction::Xor: // Can invert 'xor' if it's a 'not', by ignoring + // it. + if (!match(I, m_Not(PatternMatch::m_Value()))) + return false; // Not a 'not'. + break; + default: + return false; // Don't know, likely not freely invertible. + } + // So far all users were free to invert... + } + return true; // Can freely invert all users! + } + + /// Some binary operators require special handling to avoid poison and + /// undefined behavior. If a constant vector has undef elements, replace those + /// undefs with identity constants if possible because those are always safe + /// to execute. If no identity constant exists, replace undef with some other + /// safe constant. + static Constant * + getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, + bool IsRHSConstant) { + auto *InVTy = cast<FixedVectorType>(In->getType()); + + Type *EltTy = InVTy->getElementType(); + auto *SafeC = ConstantExpr::getBinOpIdentity(Opcode, EltTy, IsRHSConstant); + if (!SafeC) { + // TODO: Should this be available as a constant utility function? It is + // similar to getBinOpAbsorber(). + if (IsRHSConstant) { + switch (Opcode) { + case Instruction::SRem: // X % 1 = 0 + case Instruction::URem: // X %u 1 = 0 + SafeC = ConstantInt::get(EltTy, 1); + break; + case Instruction::FRem: // X % 1.0 (doesn't simplify, but it is safe) + SafeC = ConstantFP::get(EltTy, 1.0); + break; + default: + llvm_unreachable( + "Only rem opcodes have no identity constant for RHS"); + } + } else { + switch (Opcode) { + case Instruction::Shl: // 0 << X = 0 + case Instruction::LShr: // 0 >>u X = 0 + case Instruction::AShr: // 0 >> X = 0 + case Instruction::SDiv: // 0 / X = 0 + case Instruction::UDiv: // 0 /u X = 0 + case Instruction::SRem: // 0 % X = 0 + case Instruction::URem: // 0 %u X = 0 + case Instruction::Sub: // 0 - X (doesn't simplify, but it is safe) + case Instruction::FSub: // 0.0 - X (doesn't simplify, but it is safe) + case Instruction::FDiv: // 0.0 / X (doesn't simplify, but it is safe) + case Instruction::FRem: // 0.0 % X = 0 + SafeC = Constant::getNullValue(EltTy); + break; + default: + llvm_unreachable("Expected to find identity constant for opcode"); + } + } + } + assert(SafeC && "Must have safe constant for binop"); + unsigned NumElts = InVTy->getNumElements(); + SmallVector<Constant *, 16> Out(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + Constant *C = In->getAggregateElement(i); + Out[i] = isa<UndefValue>(C) ? SafeC : C; + } + return ConstantVector::get(Out); + } + + /// Create and insert the idiom we use to indicate a block is unreachable + /// without having to rewrite the CFG from within InstCombine. + static void CreateNonTerminatorUnreachable(Instruction *InsertAt) { + auto &Ctx = InsertAt->getContext(); + new StoreInst(ConstantInt::getTrue(Ctx), + UndefValue::get(Type::getInt1PtrTy(Ctx)), InsertAt); + } + + void addToWorklist(Instruction *I) { Worklist.push(I); } + + AssumptionCache &getAssumptionCache() const { return AC; } + TargetLibraryInfo &getTargetLibraryInfo() const { return TLI; } + DominatorTree &getDominatorTree() const { return DT; } + const DataLayout &getDataLayout() const { return DL; } + const SimplifyQuery &getSimplifyQuery() const { return SQ; } + OptimizationRemarkEmitter &getOptimizationRemarkEmitter() const { + return ORE; + } + BlockFrequencyInfo *getBlockFrequencyInfo() const { return BFI; } + ProfileSummaryInfo *getProfileSummaryInfo() const { return PSI; } + LoopInfo *getLoopInfo() const { return LI; } + + // Call target specific combiners + Optional<Instruction *> targetInstCombineIntrinsic(IntrinsicInst &II); + Optional<Value *> + targetSimplifyDemandedUseBitsIntrinsic(IntrinsicInst &II, APInt DemandedMask, + KnownBits &Known, + bool &KnownBitsComputed); + Optional<Value *> targetSimplifyDemandedVectorEltsIntrinsic( + IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, + APInt &UndefElts2, APInt &UndefElts3, + std::function<void(Instruction *, unsigned, APInt, APInt &)> + SimplifyAndSetOp); + + /// Inserts an instruction \p New before instruction \p Old + /// + /// Also adds the new instruction to the worklist and returns \p New so that + /// it is suitable for use as the return from the visitation patterns. + Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) { + assert(New && !New->getParent() && + "New instruction already inserted into a basic block!"); + BasicBlock *BB = Old.getParent(); + BB->getInstList().insert(Old.getIterator(), New); // Insert inst + Worklist.push(New); + return New; + } + + /// Same as InsertNewInstBefore, but also sets the debug loc. + Instruction *InsertNewInstWith(Instruction *New, Instruction &Old) { + New->setDebugLoc(Old.getDebugLoc()); + return InsertNewInstBefore(New, Old); + } + + /// A combiner-aware RAUW-like routine. + /// + /// This method is to be used when an instruction is found to be dead, + /// replaceable with another preexisting expression. Here we add all uses of + /// I to the worklist, replace all uses of I with the new value, then return + /// I, so that the inst combiner will know that I was modified. + Instruction *replaceInstUsesWith(Instruction &I, Value *V) { + // If there are no uses to replace, then we return nullptr to indicate that + // no changes were made to the program. + if (I.use_empty()) + return nullptr; + + Worklist.pushUsersToWorkList(I); // Add all modified instrs to worklist. + + // If we are replacing the instruction with itself, this must be in a + // segment of unreachable code, so just clobber the instruction. + if (&I == V) + V = UndefValue::get(I.getType()); + + LLVM_DEBUG(dbgs() << "IC: Replacing " << I << "\n" + << " with " << *V << '\n'); + + I.replaceAllUsesWith(V); + return &I; + } + + /// Replace operand of instruction and add old operand to the worklist. + Instruction *replaceOperand(Instruction &I, unsigned OpNum, Value *V) { + Worklist.addValue(I.getOperand(OpNum)); + I.setOperand(OpNum, V); + return &I; + } + + /// Replace use and add the previously used value to the worklist. + void replaceUse(Use &U, Value *NewValue) { + Worklist.addValue(U); + U = NewValue; + } + + /// Combiner aware instruction erasure. + /// + /// When dealing with an instruction that has side effects or produces a void + /// value, we can't rely on DCE to delete the instruction. Instead, visit + /// methods should return the value returned by this function. + virtual Instruction *eraseInstFromFunction(Instruction &I) = 0; + + void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, + const Instruction *CxtI) const { + llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT); + } + + KnownBits computeKnownBits(const Value *V, unsigned Depth, + const Instruction *CxtI) const { + return llvm::computeKnownBits(V, DL, Depth, &AC, CxtI, &DT); + } + + bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero = false, + unsigned Depth = 0, + const Instruction *CxtI = nullptr) { + return llvm::isKnownToBeAPowerOfTwo(V, DL, OrZero, Depth, &AC, CxtI, &DT); + } + + bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0, + const Instruction *CxtI = nullptr) const { + return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT); + } + + unsigned ComputeNumSignBits(const Value *Op, unsigned Depth = 0, + const Instruction *CxtI = nullptr) const { + return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT); + } + + OverflowResult computeOverflowForUnsignedMul(const Value *LHS, + const Value *RHS, + const Instruction *CxtI) const { + return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT); + } + + OverflowResult computeOverflowForSignedMul(const Value *LHS, const Value *RHS, + const Instruction *CxtI) const { + return llvm::computeOverflowForSignedMul(LHS, RHS, DL, &AC, CxtI, &DT); + } + + OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, + const Value *RHS, + const Instruction *CxtI) const { + return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT); + } + + OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, + const Instruction *CxtI) const { + return llvm::computeOverflowForSignedAdd(LHS, RHS, DL, &AC, CxtI, &DT); + } + + OverflowResult computeOverflowForUnsignedSub(const Value *LHS, + const Value *RHS, + const Instruction *CxtI) const { + return llvm::computeOverflowForUnsignedSub(LHS, RHS, DL, &AC, CxtI, &DT); + } + + OverflowResult computeOverflowForSignedSub(const Value *LHS, const Value *RHS, + const Instruction *CxtI) const { + return llvm::computeOverflowForSignedSub(LHS, RHS, DL, &AC, CxtI, &DT); + } + + virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, + const APInt &DemandedMask, KnownBits &Known, + unsigned Depth = 0) = 0; + virtual Value * + SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, + unsigned Depth = 0, + bool AllowMultipleUsers = false) = 0; +}; + +} // namespace llvm + +#undef DEBUG_TYPE + +#endif diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h index d4373d7b39ea..c960d5b0ab50 100644 --- a/llvm/include/llvm/Transforms/Instrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation.h @@ -66,6 +66,9 @@ struct GCOVOptions { // Add the 'noredzone' attribute to added runtime library calls. bool NoRedZone; + // Use atomic profile counter increments. + bool Atomic = false; + // Regexes separated by a semi-colon to filter the files to instrument. std::string Filter; @@ -143,9 +146,8 @@ ModulePass *createInstrProfilingLegacyPass( ModulePass *createInstrOrderFilePass(); // Insert DataFlowSanitizer (dynamic data flow analysis) instrumentation -ModulePass *createDataFlowSanitizerPass( - const std::vector<std::string> &ABIListFiles = std::vector<std::string>(), - void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr); +ModulePass *createDataFlowSanitizerLegacyPassPass( + const std::vector<std::string> &ABIListFiles = std::vector<std::string>()); // Options for sanitizer coverage instrumentation. struct SanitizerCoverageOptions { diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h index fea6064042ae..53ad0cbf9968 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h @@ -102,6 +102,7 @@ public: bool Recover = false, bool UseAfterScope = false); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } private: bool CompileKernel; @@ -122,6 +123,7 @@ public: bool UseGlobalGC = true, bool UseOdrIndicator = false); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } private: bool CompileKernel; diff --git a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h index 120c6a8fb09f..8d70f1429b99 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h +++ b/llvm/include/llvm/Transforms/Instrumentation/BoundsChecking.h @@ -17,6 +17,7 @@ namespace llvm { /// stores, and other memory intrinsics. struct BoundsCheckingPass : PassInfoMixin<BoundsCheckingPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; diff --git a/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h new file mode 100644 index 000000000000..9b57b1f9a9ea --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h @@ -0,0 +1,32 @@ +//===- DataFlowSanitizer.h - dynamic data flow analysis -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_DATAFLOWSANITIZER_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_DATAFLOWSANITIZER_H + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include <string> +#include <vector> + +namespace llvm { + +class DataFlowSanitizerPass : public PassInfoMixin<DataFlowSanitizerPass> { +private: + std::vector<std::string> ABIListFiles; + +public: + DataFlowSanitizerPass( + const std::vector<std::string> &ABIListFiles = std::vector<std::string>()) + : ABIListFiles(ABIListFiles) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h index b3971e49754e..2766cc5e6263 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/GCOVProfiler.h @@ -26,5 +26,5 @@ private: GCOVOptions GCOVOpts; }; -} // End llvm namespace +} // namespace llvm #endif diff --git a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h index e3104eeb1d36..68b47320f650 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h @@ -27,6 +27,7 @@ public: explicit HWAddressSanitizerPass(bool CompileKernel = false, bool Recover = false); PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); + static bool isRequired() { return true; } private: bool CompileKernel; @@ -36,6 +37,24 @@ private: FunctionPass *createHWAddressSanitizerLegacyPassPass(bool CompileKernel = false, bool Recover = false); +namespace HWASanAccessInfo { + +// Bit field positions for the accessinfo parameter to +// llvm.hwasan.check.memaccess. Shared between the pass and the backend. Bits +// 0-15 are also used by the runtime. +enum { + AccessSizeShift = 0, // 4 bits + IsWriteShift = 4, + RecoverShift = 5, + MatchAllShift = 16, // 8 bits + HasMatchAllShift = 24, + CompileKernelShift = 25, +}; + +enum { RuntimeMask = 0xffff }; + +} // namespace HWASanAccessInfo + } // namespace llvm #endif diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h index 263d3b629589..5ce72cd59ac2 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -68,11 +68,6 @@ private: // vector of counter load/store pairs to be register promoted. std::vector<LoadStorePair> PromotionCandidates; - // The start value of precise value profile range for memory intrinsic sizes. - int64_t MemOPSizeRangeStart; - // The end value of precise value profile range for memory intrinsic sizes. - int64_t MemOPSizeRangeLast; - int64_t TotalCountersPromoted = 0; /// Lower instrumentation intrinsics in the function. Returns true if there diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h new file mode 100644 index 000000000000..ac6a07d299a6 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -0,0 +1,51 @@ +//===--------- Definition of the MemProfiler class --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the MemProfiler class. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// Public interface to the memory profiler pass for instrumenting code to +/// profile memory accesses. +/// +/// The profiler itself is a function pass that works by inserting various +/// calls to the MemProfiler runtime library functions. The runtime library +/// essentially replaces malloc() and free() with custom implementations that +/// record data about the allocations. +class MemProfilerPass : public PassInfoMixin<MemProfilerPass> { +public: + explicit MemProfilerPass(); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } +}; + +/// Public interface to the memory profiler module pass for instrumenting code +/// to profile memory allocations and accesses. +class ModuleMemProfilerPass : public PassInfoMixin<ModuleMemProfilerPass> { +public: + explicit ModuleMemProfilerPass(); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } +}; + +// Insert MemProfiler instrumentation +FunctionPass *createMemProfilerFunctionPass(); +ModulePass *createModuleMemProfilerLegacyPassPass(); + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h index 01a86ee3f1fd..f5f9ec7829bd 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h @@ -41,6 +41,7 @@ struct MemorySanitizerPass : public PassInfoMixin<MemorySanitizerPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } private: MemorySanitizerOptions Options; diff --git a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h b/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h index 999086a29f87..e3d268cb0781 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h +++ b/llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h @@ -46,6 +46,7 @@ public: *vfs::getRealFileSystem()); } PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } private: SanitizerCoverageOptions Options; diff --git a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h index ce0e46745abb..f9c507624e6d 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h @@ -28,6 +28,7 @@ FunctionPass *createThreadSanitizerLegacyPassPass(); struct ThreadSanitizerPass : public PassInfoMixin<ThreadSanitizerPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/ObjCARC.h b/llvm/include/llvm/Transforms/ObjCARC.h index 2f114c75e2e2..a89df95385c8 100644 --- a/llvm/include/llvm/Transforms/ObjCARC.h +++ b/llvm/include/llvm/Transforms/ObjCARC.h @@ -14,6 +14,8 @@ #ifndef LLVM_TRANSFORMS_OBJCARC_H #define LLVM_TRANSFORMS_OBJCARC_H +#include "llvm/IR/PassManager.h" + namespace llvm { class Pass; @@ -42,6 +44,22 @@ Pass *createObjCARCContractPass(); // Pass *createObjCARCOptPass(); +struct ObjCARCOptPass : public PassInfoMixin<ObjCARCOptPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +struct ObjCARCContractPass : public PassInfoMixin<ObjCARCContractPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +struct ObjCARCAPElimPass : public PassInfoMixin<ObjCARCAPElimPass> { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +struct ObjCARCExpandPass : public PassInfoMixin<ObjCARCExpandPass> { + PreservedAnalyses run(Function &M, FunctionAnalysisManager &AM); +}; + } // End llvm namespace #endif diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index a1aacec76979..3db1613d7457 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -14,6 +14,7 @@ #ifndef LLVM_TRANSFORMS_SCALAR_H #define LLVM_TRANSFORMS_SCALAR_H +#include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include <functional> namespace llvm { @@ -25,12 +26,6 @@ class Pass; //===----------------------------------------------------------------------===// // -// ConstantPropagation - A worklist driven constant propagation pass -// -FunctionPass *createConstantPropagationPass(); - -//===----------------------------------------------------------------------===// -// // AlignmentFromAssumptions - Use assume intrinsics to set load/store // alignments. // @@ -38,16 +33,15 @@ FunctionPass *createAlignmentFromAssumptionsPass(); //===----------------------------------------------------------------------===// // -// SCCP - Sparse conditional constant propagation. +// AnnotationRemarks - Emit remarks for !annotation metadata. // -FunctionPass *createSCCPPass(); +FunctionPass *createAnnotationRemarksLegacyPass(); //===----------------------------------------------------------------------===// // -// DeadInstElimination - This pass quickly removes trivially dead instructions -// without modifying the CFG of the function. It is a FunctionPass. +// SCCP - Sparse conditional constant propagation. // -Pass *createDeadInstEliminationPass(); +FunctionPass *createSCCPPass(); //===----------------------------------------------------------------------===// // @@ -163,6 +157,12 @@ Pass *createLoopInterchangePass(); //===----------------------------------------------------------------------===// // +// LoopFlatten - This pass flattens nested loops into a single loop. +// +FunctionPass *createLoopFlattenPass(); + +//===----------------------------------------------------------------------===// +// // LoopStrengthReduce - This pass is strength reduces GEP instructions that use // a loop's canonical induction variable as one of their indices. // @@ -190,7 +190,8 @@ Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false, int Count = -1, int AllowPartial = -1, int Runtime = -1, int UpperBound = -1, int AllowPeeling = -1); -// Create an unrolling pass for full unrolling that uses exact trip count only. +// Create an unrolling pass for full unrolling that uses exact trip count only +// and also does peeling. Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false, bool ForgetAllSCEV = false); @@ -210,7 +211,7 @@ Pass *createLoopRerollPass(); // // LoopRotate - This pass is a simple loop rotating pass. // -Pass *createLoopRotatePass(int MaxHeaderSize = -1); +Pass *createLoopRotatePass(int MaxHeaderSize = -1, bool PrepareForLTO = false); //===----------------------------------------------------------------------===// // @@ -245,10 +246,12 @@ FunctionPass *createReassociatePass(); //===----------------------------------------------------------------------===// // // JumpThreading - Thread control through mult-pred/multi-succ blocks where some -// preds always go to some succ. Thresholds other than minus one override the -// internal BB duplication default threshold. +// preds always go to some succ. If FreezeSelectCond is true, unfold the +// condition of a select that unfolds to branch. Thresholds other than minus one +// override the internal BB duplication default threshold. // -FunctionPass *createJumpThreadingPass(int Threshold = -1); +FunctionPass *createJumpThreadingPass(bool FreezeSelectCond = false, + int Threshold = -1); //===----------------------------------------------------------------------===// // @@ -256,8 +259,7 @@ FunctionPass *createJumpThreadingPass(int Threshold = -1); // simplify terminator instructions, convert switches to lookup tables, etc. // FunctionPass *createCFGSimplificationPass( - unsigned Threshold = 1, bool ForwardSwitchCond = false, - bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false, + SimplifyCFGOptions Options = SimplifyCFGOptions(), std::function<bool(const Function &)> Ftor = nullptr); //===----------------------------------------------------------------------===// @@ -346,6 +348,13 @@ FunctionPass *createConstantHoistingPass(); //===----------------------------------------------------------------------===// // +// ConstraintElimination - This pass eliminates conditions based on found +// constraints. +// +FunctionPass *createConstraintEliminationPass(); + +//===----------------------------------------------------------------------===// +// // Sink - Code Sinking // FunctionPass *createSinkingPass(); @@ -370,6 +379,13 @@ Pass *createLowerMatrixIntrinsicsPass(); //===----------------------------------------------------------------------===// // +// LowerMatrixIntrinsicsMinimal - Lower matrix intrinsics to vector operations +// (lightweight, does not require extra analysis) +// +Pass *createLowerMatrixIntrinsicsMinimalPass(); + +//===----------------------------------------------------------------------===// +// // LowerWidenableCondition - Lower widenable condition to i1 true. // Pass *createLowerWidenableConditionPass(); @@ -523,6 +539,21 @@ Pass *createLoopSimplifyCFGPass(); // transformations. // Pass *createWarnMissedTransformationsPass(); + +//===----------------------------------------------------------------------===// +// +// This pass does instruction simplification on each +// instruction in a function. +// +FunctionPass *createInstSimplifyLegacyPass(); + + +//===----------------------------------------------------------------------===// +// +// createScalarizeMaskedMemIntrinPass - Replace masked load, store, gather +// and scatter intrinsics with scalar code when target doesn't support them. +// +FunctionPass *createScalarizeMaskedMemIntrinLegacyPass(); } // End llvm namespace #endif diff --git a/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h b/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h new file mode 100644 index 000000000000..f1619766dcf4 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h @@ -0,0 +1,26 @@ +//===- AnnotationRemarks.cpp - Emit remarks for !annotation MD --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// This file defines AnnotationRemarksPass for the new pass manager. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_ANNOTATION_REMARKS_H +#define LLVM_TRANSFORMS_SCALAR_ANNOTATION_REMARKS_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +struct AnnotationRemarksPass : public PassInfoMixin<AnnotationRemarksPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_ANNOTATION_REMARKS_H diff --git a/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h b/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h index 26d4a2476a86..11379e59467f 100644 --- a/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h +++ b/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h @@ -198,7 +198,6 @@ private: // constant GEP base. bool emitBaseConstants(GlobalVariable *BaseGV); void deleteDeadCastInst() const; - bool optimizeConstants(Function &Fn); }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Scalar/ConstraintElimination.h b/llvm/include/llvm/Transforms/Scalar/ConstraintElimination.h new file mode 100644 index 000000000000..544a6c2eae55 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/ConstraintElimination.h @@ -0,0 +1,24 @@ +//===- ConstraintElimination.h - Constraint elimination pass ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_CONSTRAINTELIMINATION_H +#define LLVM_TRANSFORMS_SCALAR_CONSTRAINTELIMINATION_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class ConstraintEliminationPass + : public PassInfoMixin<ConstraintEliminationPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_CONSTRAINTELIMINATION_H diff --git a/llvm/include/llvm/Transforms/Scalar/DCE.h b/llvm/include/llvm/Transforms/Scalar/DCE.h index 974e4b20d152..4d83296b1d86 100644 --- a/llvm/include/llvm/Transforms/Scalar/DCE.h +++ b/llvm/include/llvm/Transforms/Scalar/DCE.h @@ -23,6 +23,12 @@ class DCEPass : public PassInfoMixin<DCEPass> { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; + +class RedundantDbgInstEliminationPass + : public PassInfoMixin<RedundantDbgInstEliminationPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; } #endif // LLVM_TRANSFORMS_SCALAR_DCE_H diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h index f2818c6b792e..d6b3c8ca7219 100644 --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -46,11 +46,12 @@ class FunctionPass; class IntrinsicInst; class LoadInst; class LoopInfo; +class MemorySSA; +class MemorySSAUpdater; class OptimizationRemarkEmitter; class PHINode; class TargetLibraryInfo; class Value; - /// A private "module" namespace for types and utilities used by GVN. These /// are implementation details and should not be used by clients. namespace gvn LLVM_LIBRARY_VISIBILITY { @@ -72,6 +73,7 @@ struct GVNOptions { Optional<bool> AllowPRE = None; Optional<bool> AllowLoadPRE = None; Optional<bool> AllowLoadInLoopPRE = None; + Optional<bool> AllowLoadPRESplitBackedge = None; Optional<bool> AllowMemDep = None; GVNOptions() = default; @@ -93,6 +95,12 @@ struct GVNOptions { return *this; } + /// Enables or disables PRE of loads in GVN. + GVNOptions &setLoadPRESplitBackedge(bool LoadPRESplitBackedge) { + AllowLoadPRESplitBackedge = LoadPRESplitBackedge; + return *this; + } + /// Enables or disables use of MemDepAnalysis. GVNOptions &setMemDep(bool MemDep) { AllowMemDep = MemDep; @@ -129,6 +137,7 @@ public: bool isPREEnabled() const; bool isLoadPREEnabled() const; bool isLoadInLoopPREEnabled() const; + bool isLoadPRESplitBackedgeEnabled() const; bool isMemDepEnabled() const; /// This class holds the mapping between values and value numbers. It is used @@ -211,6 +220,7 @@ private: OptimizationRemarkEmitter *ORE = nullptr; ImplicitControlFlowTracking *ICF = nullptr; LoopInfo *LI = nullptr; + MemorySSAUpdater *MSSAU = nullptr; ValueTable VN; @@ -246,7 +256,7 @@ private: bool runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, - OptimizationRemarkEmitter *ORE); + OptimizationRemarkEmitter *ORE, MemorySSA *MSSA = nullptr); /// Push a new Value to the LeaderTable onto the list for its value number. void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { @@ -328,7 +338,6 @@ private: BasicBlock *Curr, unsigned int ValNo); Value *findLeader(const BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); - void fillImplicitControlFlowInfo(BasicBlock *BB); void verifyRemoved(const Instruction *I) const; bool splitCriticalEdges(); BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); diff --git a/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h b/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h index 3c20537ab76a..b5d544f1149c 100644 --- a/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h +++ b/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h @@ -23,7 +23,11 @@ class Loop; class LPMUpdater; class IndVarSimplifyPass : public PassInfoMixin<IndVarSimplifyPass> { + /// Perform IV widening during the pass. + bool WidenIndVars; + public: + IndVarSimplifyPass(bool WidenIndVars = true) : WidenIndVars(WidenIndVars) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; diff --git a/llvm/include/llvm/Transforms/Scalar/InferAddressSpaces.h b/llvm/include/llvm/Transforms/Scalar/InferAddressSpaces.h new file mode 100644 index 000000000000..9a56b073f1c6 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/InferAddressSpaces.h @@ -0,0 +1,27 @@ +//===- InferAddressSpace.h - ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_INFERADDRESSSPACES_H +#define LLVM_TRANSFORMS_SCALAR_INFERADDRESSSPACES_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct InferAddressSpacesPass : PassInfoMixin<InferAddressSpacesPass> { + InferAddressSpacesPass(); + InferAddressSpacesPass(unsigned AddressSpace); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + unsigned FlatAddrSpace = 0; +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_INFERADDRESSSPACES_H diff --git a/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h b/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h index 0c30b6260536..f36695a8c2b7 100644 --- a/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h +++ b/llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h @@ -36,10 +36,6 @@ public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; -/// Create a legacy pass that does instruction simplification on each -/// instruction in a function. -FunctionPass *createInstSimplifyLegacyPass(); - } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_INSTSIMPLIFYPASS_H diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h index 327bf6d00c47..951f4e487753 100644 --- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -19,7 +19,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/DomTreeUpdater.h" @@ -29,6 +28,7 @@ namespace llvm { +class AAResults; class BasicBlock; class BinaryOperator; class BranchInst; @@ -41,6 +41,8 @@ class IntrinsicInst; class LazyValueInfo; class LoadInst; class PHINode; +class SelectInst; +class SwitchInst; class TargetLibraryInfo; class Value; @@ -77,7 +79,7 @@ enum ConstantPreference { WantInteger, WantBlockAddress }; class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> { TargetLibraryInfo *TLI; LazyValueInfo *LVI; - AliasAnalysis *AA; + AAResults *AA; DomTreeUpdater *DTU; std::unique_ptr<BlockFrequencyInfo> BFI; std::unique_ptr<BranchProbabilityInfo> BPI; @@ -91,15 +93,16 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> { unsigned BBDupThreshold; unsigned DefaultBBDupThreshold; + bool InsertFreezeWhenUnfoldingSelect; public: - JumpThreadingPass(int T = -1); + JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1); // Glue for old PM. - bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_, - AliasAnalysis *AA_, DomTreeUpdater *DTU_, bool HasProfileData_, - std::unique_ptr<BlockFrequencyInfo> BFI_, - std::unique_ptr<BranchProbabilityInfo> BPI_); + bool runImpl(Function &F, TargetLibraryInfo *TLI, LazyValueInfo *LVI, + AAResults *AA, DomTreeUpdater *DTU, bool HasProfileData, + std::unique_ptr<BlockFrequencyInfo> BFI, + std::unique_ptr<BranchProbabilityInfo> BPI); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); @@ -108,65 +111,65 @@ public: BPI.reset(); } - void FindLoopHeaders(Function &F); - bool ProcessBlock(BasicBlock *BB); - bool MaybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB); - void UpdateSSA(BasicBlock *BB, BasicBlock *NewBB, + void findLoopHeaders(Function &F); + bool processBlock(BasicBlock *BB); + bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB); + void updateSSA(BasicBlock *BB, BasicBlock *NewBB, DenseMap<Instruction *, Value *> &ValueMapping); - DenseMap<Instruction *, Value *> CloneInstructions(BasicBlock::iterator BI, + DenseMap<Instruction *, Value *> cloneInstructions(BasicBlock::iterator BI, BasicBlock::iterator BE, BasicBlock *NewBB, BasicBlock *PredBB); - bool TryThreadEdge(BasicBlock *BB, + bool tryThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs, BasicBlock *SuccBB); - void ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs, + void threadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs, BasicBlock *SuccBB); - bool DuplicateCondBranchOnPHIIntoPred( + bool duplicateCondBranchOnPHIIntoPred( BasicBlock *BB, const SmallVectorImpl<BasicBlock *> &PredBBs); - bool ComputeValueKnownInPredecessorsImpl( + bool computeValueKnownInPredecessorsImpl( Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, DenseSet<Value *> &RecursionSet, Instruction *CxtI = nullptr); bool - ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, + computeValueKnownInPredecessors(Value *V, BasicBlock *BB, jumpthreading::PredValueInfo &Result, jumpthreading::ConstantPreference Preference, Instruction *CxtI = nullptr) { DenseSet<Value *> RecursionSet; - return ComputeValueKnownInPredecessorsImpl(V, BB, Result, Preference, + return computeValueKnownInPredecessorsImpl(V, BB, Result, Preference, RecursionSet, CxtI); } - Constant *EvaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, + Constant *evaluateOnPredecessorEdge(BasicBlock *BB, BasicBlock *PredPredBB, Value *cond); - bool MaybeThreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond); - void ThreadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, + bool maybethreadThroughTwoBasicBlocks(BasicBlock *BB, Value *Cond); + void threadThroughTwoBasicBlocks(BasicBlock *PredPredBB, BasicBlock *PredBB, BasicBlock *BB, BasicBlock *SuccBB); - bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB, + bool processThreadableEdges(Value *Cond, BasicBlock *BB, jumpthreading::ConstantPreference Preference, Instruction *CxtI = nullptr); - bool ProcessBranchOnPHI(PHINode *PN); - bool ProcessBranchOnXOR(BinaryOperator *BO); - bool ProcessImpliedCondition(BasicBlock *BB); + bool processBranchOnPHI(PHINode *PN); + bool processBranchOnXOR(BinaryOperator *BO); + bool processImpliedCondition(BasicBlock *BB); - bool SimplifyPartiallyRedundantLoad(LoadInst *LI); - void UnfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, + bool simplifyPartiallyRedundantLoad(LoadInst *LI); + void unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB, SelectInst *SI, PHINode *SIUse, unsigned Idx); - bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB); - bool TryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB); - bool TryToUnfoldSelectInCurrBB(BasicBlock *BB); + bool tryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB); + bool tryToUnfoldSelect(SwitchInst *SI, BasicBlock *BB); + bool tryToUnfoldSelectInCurrBB(BasicBlock *BB); - bool ProcessGuards(BasicBlock *BB); - bool ThreadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI); + bool processGuards(BasicBlock *BB); + bool threadGuard(BasicBlock *BB, IntrinsicInst *Guard, BranchInst *BI); private: - BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, + BasicBlock *splitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, const char *Suffix); - void UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB, + void updateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB, BasicBlock *NewBB, BasicBlock *SuccBB); /// Check if the block has profile metadata for its outgoing edges. bool doesBlockHaveProfileData(BasicBlock *BB); diff --git a/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h b/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h new file mode 100644 index 000000000000..41f91f090013 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/LoopFlatten.h @@ -0,0 +1,32 @@ +//===- LoopFlatten.h - Loop Flatten ---------------- -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the interface for the Loop Flatten Pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H +#define LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H + +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +class LoopFlattenPass : public PassInfoMixin<LoopFlattenPass> { +public: + LoopFlattenPass() = default; + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_LOOPFLATTEN_H diff --git a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h index d2fff8bb5743..0c6406d86185 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h @@ -23,6 +23,19 @@ namespace llvm { class Loop; class LPMUpdater; +/// Options to disable Loop Idiom Recognize, which can be shared with other +/// passes. +struct DisableLIRP { + /// When true, the entire pass is disabled. + static bool All; + + /// When true, Memset is disabled. + static bool Memset; + + /// When true, Memcpy is disabled. + static bool Memcpy; +}; + /// Performs Loop Idiom Recognize Pass. class LoopIdiomRecognizePass : public PassInfoMixin<LoopIdiomRecognizePass> { public: diff --git a/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h b/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h new file mode 100644 index 000000000000..9f50fc5a4127 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/LoopInterchange.h @@ -0,0 +1,24 @@ +//===- LoopInterchange.h - Loop interchange pass --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOOPINTERCHANGE_H +#define LLVM_TRANSFORMS_SCALAR_LOOPINTERCHANGE_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +struct LoopInterchangePass : public PassInfoMixin<LoopInterchangePass> { + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_LOOPINTERCHANGE_H diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h index 9b2f0fcab95b..2a342fcda3c2 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -36,41 +36,163 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPPASSMANAGER_H #define LLVM_TRANSFORMS_SCALAR_LOOPPASSMANAGER_H -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/PriorityWorklist.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassManager.h" #include "llvm/Transforms/Utils/LCSSA.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include <memory> namespace llvm { // Forward declarations of an update tracking API used in the pass manager. class LPMUpdater; +namespace { + +template <typename PassT> +using HasRunOnLoopT = decltype(std::declval<PassT>().run( + std::declval<Loop &>(), std::declval<LoopAnalysisManager &>(), + std::declval<LoopStandardAnalysisResults &>(), + std::declval<LPMUpdater &>())); + +} // namespace + // Explicit specialization and instantiation declarations for the pass manager. // See the comments on the definition of the specialization for details on how // it differs from the primary template. template <> -PreservedAnalyses -PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, - LPMUpdater &>::run(Loop &InitialL, LoopAnalysisManager &AM, - LoopStandardAnalysisResults &AnalysisResults, - LPMUpdater &U); -extern template class PassManager<Loop, LoopAnalysisManager, - LoopStandardAnalysisResults &, LPMUpdater &>; +class PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, + LPMUpdater &> + : public PassInfoMixin< + PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, + LPMUpdater &>> { +public: + /// Construct a pass manager. + /// + /// If \p DebugLogging is true, we'll log our progress to llvm::dbgs(). + explicit PassManager(bool DebugLogging = false) + : DebugLogging(DebugLogging) {} + + // FIXME: These are equivalent to the default move constructor/move + // assignment. However, using = default triggers linker errors due to the + // explicit instantiations below. Find a way to use the default and remove the + // duplicated code here. + PassManager(PassManager &&Arg) + : IsLoopNestPass(std::move(Arg.IsLoopNestPass)), + LoopPasses(std::move(Arg.LoopPasses)), + LoopNestPasses(std::move(Arg.LoopNestPasses)), + DebugLogging(std::move(Arg.DebugLogging)) {} + + PassManager &operator=(PassManager &&RHS) { + IsLoopNestPass = std::move(RHS.IsLoopNestPass); + LoopPasses = std::move(RHS.LoopPasses); + LoopNestPasses = std::move(RHS.LoopNestPasses); + DebugLogging = std::move(RHS.DebugLogging); + return *this; + } + + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); + + /// Add either a loop pass or a loop-nest pass to the pass manager. Append \p + /// Pass to the list of loop passes if it has a dedicated \fn run() method for + /// loops and to the list of loop-nest passes if the \fn run() method is for + /// loop-nests instead. Also append whether \p Pass is loop-nest pass or not + /// to the end of \var IsLoopNestPass so we can easily identify the types of + /// passes in the pass manager later. + template <typename PassT> + std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value> + addPass(PassT Pass) { + using LoopPassModelT = + detail::PassModel<Loop, PassT, PreservedAnalyses, LoopAnalysisManager, + LoopStandardAnalysisResults &, LPMUpdater &>; + IsLoopNestPass.push_back(false); + LoopPasses.emplace_back(new LoopPassModelT(std::move(Pass))); + } + + template <typename PassT> + std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value> + addPass(PassT Pass) { + using LoopNestPassModelT = + detail::PassModel<LoopNest, PassT, PreservedAnalyses, + LoopAnalysisManager, LoopStandardAnalysisResults &, + LPMUpdater &>; + IsLoopNestPass.push_back(true); + LoopNestPasses.emplace_back(new LoopNestPassModelT(std::move(Pass))); + } + + // Specializations of `addPass` for `RepeatedPass`. These are necessary since + // `RepeatedPass` has a templated `run` method that will result in incorrect + // detection of `HasRunOnLoopT`. + template <typename PassT> + std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value> + addPass(RepeatedPass<PassT> Pass) { + using RepeatedLoopPassModelT = + detail::PassModel<Loop, RepeatedPass<PassT>, PreservedAnalyses, + LoopAnalysisManager, LoopStandardAnalysisResults &, + LPMUpdater &>; + IsLoopNestPass.push_back(false); + LoopPasses.emplace_back(new RepeatedLoopPassModelT(std::move(Pass))); + } + + template <typename PassT> + std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value> + addPass(RepeatedPass<PassT> Pass) { + using RepeatedLoopNestPassModelT = + detail::PassModel<LoopNest, RepeatedPass<PassT>, PreservedAnalyses, + LoopAnalysisManager, LoopStandardAnalysisResults &, + LPMUpdater &>; + IsLoopNestPass.push_back(true); + LoopNestPasses.emplace_back( + new RepeatedLoopNestPassModelT(std::move(Pass))); + } + + bool isEmpty() const { return LoopPasses.empty() && LoopNestPasses.empty(); } + + static bool isRequired() { return true; } + + size_t getNumLoopPasses() const { return LoopPasses.size(); } + size_t getNumLoopNestPasses() const { return LoopNestPasses.size(); } + +protected: + using LoopPassConceptT = + detail::PassConcept<Loop, LoopAnalysisManager, + LoopStandardAnalysisResults &, LPMUpdater &>; + using LoopNestPassConceptT = + detail::PassConcept<LoopNest, LoopAnalysisManager, + LoopStandardAnalysisResults &, LPMUpdater &>; + + // BitVector that identifies whether the passes are loop passes or loop-nest + // passes (true for loop-nest passes). + BitVector IsLoopNestPass; + std::vector<std::unique_ptr<LoopPassConceptT>> LoopPasses; + std::vector<std::unique_ptr<LoopNestPassConceptT>> LoopNestPasses; + + /// Flag indicating whether we should do debug logging. + bool DebugLogging; + + /// Run either a loop pass or a loop-nest pass. Returns `None` if + /// PassInstrumentation's BeforePass returns false. Otherwise, returns the + /// preserved analyses of the pass. + template <typename IRUnitT, typename PassT> + Optional<PreservedAnalyses> + runSinglePass(IRUnitT &IR, PassT &Pass, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U, + PassInstrumentation &PI); + + PreservedAnalyses runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U); + PreservedAnalyses runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U); +}; /// The Loop pass manager. /// @@ -103,7 +225,7 @@ using RequireAnalysisLoopPass = RequireAnalysisPass<AnalysisT, Loop, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater &>; -template <typename LoopPassT> class FunctionToLoopPassAdaptor; +class FunctionToLoopPassAdaptor; /// This class provides an interface for updating the loop pass manager based /// on mutations to the loop nest. @@ -111,6 +233,13 @@ template <typename LoopPassT> class FunctionToLoopPassAdaptor; /// A reference to an instance of this class is passed as an argument to each /// Loop pass, and Loop passes should use it to update LPM infrastructure if /// they modify the loop nest structure. +/// +/// \c LPMUpdater comes with two modes: the loop mode and the loop-nest mode. In +/// loop mode, all the loops in the function will be pushed into the worklist +/// and when new loops are added to the pipeline, their subloops are also +/// inserted recursively. On the other hand, in loop-nest mode, only top-level +/// loops are contained in the worklist and the addition of new (top-level) +/// loops will not trigger the addition of their subloops. class LPMUpdater { public: /// This can be queried by loop passes which run other loop passes (like pass @@ -132,6 +261,8 @@ public: /// state, this routine will mark that the current loop should be skipped by /// the rest of the pass management infrastructure. void markLoopAsDeleted(Loop &L, llvm::StringRef Name) { + assert((!LoopNestMode || L.isOutermost()) && + "L should be a top-level loop in loop-nest mode."); LAM.clear(L, Name); assert((&L == CurrentL || CurrentL->contains(&L)) && "Cannot delete a loop outside of the " @@ -147,6 +278,8 @@ public: /// loops within them will be visited in postorder as usual for the loop pass /// manager. void addChildLoops(ArrayRef<Loop *> NewChildLoops) { + assert(!LoopNestMode && + "Child loops should not be pushed in loop-nest mode."); // Insert ourselves back into the worklist first, as this loop should be // revisited after all the children have been processed. Worklist.insert(CurrentL); @@ -178,7 +311,10 @@ public: "All of the new loops must be siblings of the current loop!"); #endif - appendLoopsToWorklist(NewSibLoops, Worklist); + if (LoopNestMode) + Worklist.insert(NewSibLoops); + else + appendLoopsToWorklist(NewSibLoops, Worklist); // No need to skip the current loop or revisit it, as sibling loops // shouldn't impact anything. @@ -198,7 +334,7 @@ public: } private: - template <typename LoopPassT> friend class llvm::FunctionToLoopPassAdaptor; + friend class llvm::FunctionToLoopPassAdaptor; /// The \c FunctionToLoopPassAdaptor's worklist of loops to process. SmallPriorityWorklist<Loop *, 4> &Worklist; @@ -208,6 +344,7 @@ private: Loop *CurrentL; bool SkipCurrentLoop; + const bool LoopNestMode; #ifndef NDEBUG // In debug builds we also track the parent loop to implement asserts even in @@ -216,10 +353,33 @@ private: #endif LPMUpdater(SmallPriorityWorklist<Loop *, 4> &Worklist, - LoopAnalysisManager &LAM) - : Worklist(Worklist), LAM(LAM) {} + LoopAnalysisManager &LAM, bool LoopNestMode = false) + : Worklist(Worklist), LAM(LAM), LoopNestMode(LoopNestMode) {} }; +template <typename IRUnitT, typename PassT> +Optional<PreservedAnalyses> LoopPassManager::runSinglePass( + IRUnitT &IR, PassT &Pass, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U, PassInstrumentation &PI) { + // Check the PassInstrumentation's BeforePass callbacks before running the + // pass, skip its execution completely if asked to (callback returns false). + if (!PI.runBeforePass<IRUnitT>(*Pass, IR)) + return None; + + PreservedAnalyses PA; + { + TimeTraceScope TimeScope(Pass->name(), IR.getName()); + PA = Pass->run(IR, AM, AR, U); + } + + // do not pass deleted Loop into the instrumentation + if (U.skipCurrentLoop()) + PI.runAfterPassInvalidated<IRUnitT>(*Pass, PA); + else + PI.runAfterPass<IRUnitT>(*Pass, IR, PA); + return PA; +} + /// Adaptor that maps from a function to its loops. /// /// Designed to allow composition of a LoopPass(Manager) and a @@ -227,161 +387,107 @@ private: /// FunctionAnalysisManager it will run the \c LoopAnalysisManagerFunctionProxy /// analysis prior to running the loop passes over the function to enable a \c /// LoopAnalysisManager to be used within this run safely. -template <typename LoopPassT> +/// +/// The adaptor comes with two modes: the loop mode and the loop-nest mode, and +/// the worklist updater lived inside will be in the same mode as the adaptor +/// (refer to the documentation of \c LPMUpdater for more detailed explanation). +/// Specifically, in loop mode, all loops in the funciton will be pushed into +/// the worklist and processed by \p Pass, while only top-level loops are +/// processed in loop-nest mode. Please refer to the various specializations of +/// \fn createLoopFunctionToLoopPassAdaptor to see when loop mode and loop-nest +/// mode are used. class FunctionToLoopPassAdaptor - : public PassInfoMixin<FunctionToLoopPassAdaptor<LoopPassT>> { + : public PassInfoMixin<FunctionToLoopPassAdaptor> { public: - explicit FunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false, - bool DebugLogging = false) + using PassConceptT = + detail::PassConcept<Loop, LoopAnalysisManager, + LoopStandardAnalysisResults &, LPMUpdater &>; + + explicit FunctionToLoopPassAdaptor(std::unique_ptr<PassConceptT> Pass, + bool UseMemorySSA = false, + bool UseBlockFrequencyInfo = false, + bool DebugLogging = false, + bool LoopNestMode = false) : Pass(std::move(Pass)), LoopCanonicalizationFPM(DebugLogging), - UseMemorySSA(UseMemorySSA) { + UseMemorySSA(UseMemorySSA), + UseBlockFrequencyInfo(UseBlockFrequencyInfo), + LoopNestMode(LoopNestMode) { LoopCanonicalizationFPM.addPass(LoopSimplifyPass()); LoopCanonicalizationFPM.addPass(LCSSAPass()); } /// Runs the loop passes across every loop in the function. - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) { - // Before we even compute any loop analyses, first run a miniature function - // pass pipeline to put loops into their canonical form. Note that we can - // directly build up function analyses after this as the function pass - // manager handles all the invalidation at that layer. - PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(F); - - PreservedAnalyses PA = PreservedAnalyses::all(); - // Check the PassInstrumentation's BeforePass callbacks before running the - // canonicalization pipeline. - if (PI.runBeforePass<Function>(LoopCanonicalizationFPM, F)) { - PA = LoopCanonicalizationFPM.run(F, AM); - PI.runAfterPass<Function>(LoopCanonicalizationFPM, F); - } - - // Get the loop structure for this function - LoopInfo &LI = AM.getResult<LoopAnalysis>(F); - - // If there are no loops, there is nothing to do here. - if (LI.empty()) - return PA; - - // Get the analysis results needed by loop passes. - MemorySSA *MSSA = UseMemorySSA - ? (&AM.getResult<MemorySSAAnalysis>(F).getMSSA()) - : nullptr; - LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F), - AM.getResult<AssumptionAnalysis>(F), - AM.getResult<DominatorTreeAnalysis>(F), - AM.getResult<LoopAnalysis>(F), - AM.getResult<ScalarEvolutionAnalysis>(F), - AM.getResult<TargetLibraryAnalysis>(F), - AM.getResult<TargetIRAnalysis>(F), - MSSA}; - - // Setup the loop analysis manager from its proxy. It is important that - // this is only done when there are loops to process and we have built the - // LoopStandardAnalysisResults object. The loop analyses cached in this - // manager have access to those analysis results and so it must invalidate - // itself when they go away. - auto &LAMFP = AM.getResult<LoopAnalysisManagerFunctionProxy>(F); - if (UseMemorySSA) - LAMFP.markMSSAUsed(); - LoopAnalysisManager &LAM = LAMFP.getManager(); - - // A postorder worklist of loops to process. - SmallPriorityWorklist<Loop *, 4> Worklist; - - // Register the worklist and loop analysis manager so that loop passes can - // update them when they mutate the loop nest structure. - LPMUpdater Updater(Worklist, LAM); - - // Add the loop nests in the reverse order of LoopInfo. See method - // declaration. - appendLoopsToWorklist(LI, Worklist); - - do { - Loop *L = Worklist.pop_back_val(); - - // Reset the update structure for this loop. - Updater.CurrentL = L; - Updater.SkipCurrentLoop = false; + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); -#ifndef NDEBUG - // Save a parent loop pointer for asserts. - Updater.ParentL = L->getParentLoop(); + static bool isRequired() { return true; } - // Verify the loop structure and LCSSA form before visiting the loop. - L->verifyLoop(); - assert(L->isRecursivelyLCSSAForm(LAR.DT, LI) && - "Loops must remain in LCSSA form!"); -#endif - // Check the PassInstrumentation's BeforePass callbacks before running the - // pass, skip its execution completely if asked to (callback returns - // false). - if (!PI.runBeforePass<Loop>(Pass, *L)) - continue; - - PreservedAnalyses PassPA; - { - TimeTraceScope TimeScope(Pass.name()); - PassPA = Pass.run(*L, LAM, LAR, Updater); - } - - // Do not pass deleted Loop into the instrumentation. - if (Updater.skipCurrentLoop()) - PI.runAfterPassInvalidated<Loop>(Pass); - else - PI.runAfterPass<Loop>(Pass, *L); - - // FIXME: We should verify the set of analyses relevant to Loop passes - // are preserved. - - // If the loop hasn't been deleted, we need to handle invalidation here. - if (!Updater.skipCurrentLoop()) - // We know that the loop pass couldn't have invalidated any other - // loop's analyses (that's the contract of a loop pass), so directly - // handle the loop analysis manager's invalidation here. - LAM.invalidate(*L, PassPA); - - // Then intersect the preserved set so that invalidation of module - // analyses will eventually occur when the module pass completes. - PA.intersect(std::move(PassPA)); - } while (!Worklist.empty()); - - // By definition we preserve the proxy. We also preserve all analyses on - // Loops. This precludes *any* invalidation of loop analyses by the proxy, - // but that's OK because we've taken care to invalidate analyses in the - // loop analysis manager incrementally above. - PA.preserveSet<AllAnalysesOn<Loop>>(); - PA.preserve<LoopAnalysisManagerFunctionProxy>(); - // We also preserve the set of standard analyses. - PA.preserve<DominatorTreeAnalysis>(); - PA.preserve<LoopAnalysis>(); - PA.preserve<ScalarEvolutionAnalysis>(); - if (UseMemorySSA) - PA.preserve<MemorySSAAnalysis>(); - // FIXME: What we really want to do here is preserve an AA category, but - // that concept doesn't exist yet. - PA.preserve<AAManager>(); - PA.preserve<BasicAA>(); - PA.preserve<GlobalsAA>(); - PA.preserve<SCEVAA>(); - return PA; - } + bool isLoopNestMode() const { return LoopNestMode; } private: - LoopPassT Pass; + std::unique_ptr<PassConceptT> Pass; FunctionPassManager LoopCanonicalizationFPM; bool UseMemorySSA = false; + bool UseBlockFrequencyInfo = false; + const bool LoopNestMode; }; /// A function to deduce a loop pass type and wrap it in the templated /// adaptor. +/// +/// If \p Pass is a loop pass, the returned adaptor will be in loop mode. template <typename LoopPassT> -FunctionToLoopPassAdaptor<LoopPassT> +inline std::enable_if_t<is_detected<HasRunOnLoopT, LoopPassT>::value, + FunctionToLoopPassAdaptor> createFunctionToLoopPassAdaptor(LoopPassT Pass, bool UseMemorySSA = false, + bool UseBlockFrequencyInfo = false, + bool DebugLogging = false) { + using PassModelT = + detail::PassModel<Loop, LoopPassT, PreservedAnalyses, LoopAnalysisManager, + LoopStandardAnalysisResults &, LPMUpdater &>; + return FunctionToLoopPassAdaptor( + std::make_unique<PassModelT>(std::move(Pass)), UseMemorySSA, + UseBlockFrequencyInfo, DebugLogging, false); +} + +/// If \p Pass is a loop-nest pass, \p Pass will first be wrapped into a +/// \c LoopPassManager and the returned adaptor will be in loop-nest mode. +template <typename LoopNestPassT> +inline std::enable_if_t<!is_detected<HasRunOnLoopT, LoopNestPassT>::value, + FunctionToLoopPassAdaptor> +createFunctionToLoopPassAdaptor(LoopNestPassT Pass, bool UseMemorySSA = false, + bool UseBlockFrequencyInfo = false, bool DebugLogging = false) { - return FunctionToLoopPassAdaptor<LoopPassT>(std::move(Pass), UseMemorySSA, - DebugLogging); + LoopPassManager LPM(DebugLogging); + LPM.addPass(std::move(Pass)); + using PassModelT = + detail::PassModel<Loop, LoopPassManager, PreservedAnalyses, + LoopAnalysisManager, LoopStandardAnalysisResults &, + LPMUpdater &>; + return FunctionToLoopPassAdaptor(std::make_unique<PassModelT>(std::move(LPM)), + UseMemorySSA, UseBlockFrequencyInfo, + DebugLogging, true); +} + +/// If \p Pass is an instance of \c LoopPassManager, the returned adaptor will +/// be in loop-nest mode if the pass manager contains only loop-nest passes. +template <> +inline FunctionToLoopPassAdaptor +createFunctionToLoopPassAdaptor<LoopPassManager>(LoopPassManager LPM, + bool UseMemorySSA, + bool UseBlockFrequencyInfo, + bool DebugLogging) { + // Check if LPM contains any loop pass and if it does not, returns an adaptor + // in loop-nest mode. + using PassModelT = + detail::PassModel<Loop, LoopPassManager, PreservedAnalyses, + LoopAnalysisManager, LoopStandardAnalysisResults &, + LPMUpdater &>; + bool LoopNestMode = (LPM.getNumLoopPasses() == 0); + return FunctionToLoopPassAdaptor(std::make_unique<PassModelT>(std::move(LPM)), + UseMemorySSA, UseBlockFrequencyInfo, + DebugLogging, LoopNestMode); } /// Pass for printing a loop's contents as textual IR. diff --git a/llvm/include/llvm/Transforms/Scalar/LoopReroll.h b/llvm/include/llvm/Transforms/Scalar/LoopReroll.h new file mode 100644 index 000000000000..6ae309e48a28 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/LoopReroll.h @@ -0,0 +1,27 @@ +//===- LoopReroll.h - Loop rerolling pass ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOOPREROLL_H +#define LLVM_TRANSFORMS_SCALAR_LOOPREROLL_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +class Function; + +class LoopRerollPass : public PassInfoMixin<LoopRerollPass> { +public: + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_LOOPREROLL_H diff --git a/llvm/include/llvm/Transforms/Scalar/LoopRotation.h b/llvm/include/llvm/Transforms/Scalar/LoopRotation.h index 254e6072906a..f68ac70da324 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopRotation.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopRotation.h @@ -22,12 +22,14 @@ namespace llvm { /// A simple loop rotation transformation. class LoopRotatePass : public PassInfoMixin<LoopRotatePass> { public: - LoopRotatePass(bool EnableHeaderDuplication = true); + LoopRotatePass(bool EnableHeaderDuplication = true, + bool PrepareForLTO = false); PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); private: const bool EnableHeaderDuplication; + const bool PrepareForLTO; }; } diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h index 7b049bdc8ad1..30cc08cb42ae 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h @@ -22,7 +22,7 @@ class Function; class Loop; class LPMUpdater; -/// Loop unroll pass that only does full loop unrolling. +/// Loop unroll pass that only does full loop unrolling and peeling. class LoopFullUnrollPass : public PassInfoMixin<LoopFullUnrollPass> { const int OptLevel; diff --git a/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h b/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h new file mode 100644 index 000000000000..87d6d6759db2 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h @@ -0,0 +1,25 @@ +//===- LoopVersioningLICM.h - LICM Loop Versioning ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOOPVERSIONINGLICM_H +#define LLVM_TRANSFORMS_SCALAR_LOOPVERSIONINGLICM_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" + +namespace llvm { + +class LoopVersioningLICMPass : public PassInfoMixin<LoopVersioningLICMPass> { +public: + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &LAR, LPMUpdater &U); +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_LOOPVERSIONINGLICM_H diff --git a/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h b/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h index 40f8ca571f19..1d5550829f93 100644 --- a/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h +++ b/llvm/include/llvm/Transforms/Scalar/LowerAtomic.h @@ -22,6 +22,7 @@ namespace llvm { class LowerAtomicPass : public PassInfoMixin<LowerAtomicPass> { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &); + static bool isRequired() { return true; } }; } diff --git a/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h b/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h index 4e47ff70d557..22b2e649e4d4 100644 --- a/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h +++ b/llvm/include/llvm/Transforms/Scalar/LowerExpectIntrinsic.h @@ -17,6 +17,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/CommandLine.h" namespace llvm { @@ -31,6 +32,8 @@ struct LowerExpectIntrinsicPass : PassInfoMixin<LowerExpectIntrinsicPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &); }; +extern cl::opt<uint32_t> LikelyBranchWeight; +extern cl::opt<uint32_t> UnlikelyBranchWeight; } #endif diff --git a/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h b/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h index 2f75cd5017aa..a2a31d302ccb 100644 --- a/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h +++ b/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h @@ -16,8 +16,14 @@ #include "llvm/IR/PassManager.h" namespace llvm { -struct LowerMatrixIntrinsicsPass : PassInfoMixin<LowerMatrixIntrinsicsPass> { +class LowerMatrixIntrinsicsPass + : public PassInfoMixin<LowerMatrixIntrinsicsPass> { + bool Minimal; + +public: + LowerMatrixIntrinsicsPass(bool Minimal = false) : Minimal(Minimal) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index 8fc6c23e6944..635b706d0bef 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -14,7 +14,6 @@ #ifndef LLVM_TRANSFORMS_SCALAR_MEMCPYOPTIMIZER_H #define LLVM_TRANSFORMS_SCALAR_MEMCPYOPTIMIZER_H -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/PassManager.h" #include <cstdint> @@ -22,14 +21,19 @@ namespace llvm { +class AAResults; class AssumptionCache; +class CallBase; class CallInst; class DominatorTree; class Function; class Instruction; +class LoadInst; class MemCpyInst; class MemMoveInst; class MemoryDependenceResults; +class MemorySSA; +class MemorySSAUpdater; class MemSetInst; class StoreInst; class TargetLibraryInfo; @@ -38,9 +42,11 @@ class Value; class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> { MemoryDependenceResults *MD = nullptr; TargetLibraryInfo *TLI = nullptr; - std::function<AliasAnalysis &()> LookupAliasAnalysis; - std::function<AssumptionCache &()> LookupAssumptionCache; - std::function<DominatorTree &()> LookupDomTree; + AAResults *AA = nullptr; + AssumptionCache *AC = nullptr; + DominatorTree *DT = nullptr; + MemorySSA *MSSA = nullptr; + MemorySSAUpdater *MSSAU = nullptr; public: MemCpyOptPass() = default; @@ -48,11 +54,9 @@ public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); // Glue for the old PM. - bool runImpl(Function &F, MemoryDependenceResults *MD_, - TargetLibraryInfo *TLI_, - std::function<AliasAnalysis &()> LookupAliasAnalysis_, - std::function<AssumptionCache &()> LookupAssumptionCache_, - std::function<DominatorTree &()> LookupDomTree_); + bool runImpl(Function &F, MemoryDependenceResults *MD, TargetLibraryInfo *TLI, + AAResults *AA, AssumptionCache *AC, DominatorTree *DT, + MemorySSA *MSSA); private: // Helper functions @@ -60,15 +64,18 @@ private: bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI); bool processMemMove(MemMoveInst *M); - bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc, - uint64_t cpyLen, Align cpyAlign, CallInst *C); + bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore, + Value *cpyDst, Value *cpySrc, uint64_t cpyLen, + Align cpyAlign, CallInst *C); bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep); bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet); bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet); bool processByValArgument(CallBase &CB, unsigned ArgNo); Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); + bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI); + void eraseInstruction(Instruction *I); bool iterateOnFunction(Function &F); }; diff --git a/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h b/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h index 26f5fe185dd5..5fa7427b2603 100644 --- a/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h +++ b/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h @@ -114,7 +114,7 @@ private: bool doOneIteration(Function &F); // Reassociates I for better CSE. - Instruction *tryReassociate(Instruction *I); + Instruction *tryReassociate(Instruction *I, const SCEV *&OrigSCEV); // Reassociate GEP for better CSE. Instruction *tryReassociateGEP(GetElementPtrInst *GEP); diff --git a/llvm/include/llvm/Transforms/Scalar/Reg2Mem.h b/llvm/include/llvm/Transforms/Scalar/Reg2Mem.h new file mode 100644 index 000000000000..25f6563d7dcf --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/Reg2Mem.h @@ -0,0 +1,27 @@ +//===- Reg2Mem.h - Convert registers to allocas -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the interface for the RegToMem Pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_REG2MEM_H +#define LLVM_TRANSFORMS_SCALAR_REG2MEM_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class RegToMemPass : public PassInfoMixin<RegToMemPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_REG2MEM_H diff --git a/llvm/include/llvm/Transforms/Scalar/SROA.h b/llvm/include/llvm/Transforms/Scalar/SROA.h index 864a0cbd9db1..6ef7c6b22c0b 100644 --- a/llvm/include/llvm/Transforms/Scalar/SROA.h +++ b/llvm/include/llvm/Transforms/Scalar/SROA.h @@ -18,6 +18,7 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" #include <vector> namespace llvm { @@ -77,8 +78,8 @@ class SROA : public PassInfoMixin<SROA> { /// A collection of instructions to delete. /// We try to batch deletions to simplify code and make things a bit more - /// efficient. - SetVector<Instruction *, SmallVector<Instruction *, 8>> DeadInsts; + /// efficient. We also make sure there is no dangling pointers. + SmallVector<WeakVH, 8> DeadInsts; /// Post-promotion worklist. /// diff --git a/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h b/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h new file mode 100644 index 000000000000..19339ca13242 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h @@ -0,0 +1,29 @@ +//===- ScalarizeMaskedMemIntrin.h - Scalarize unsupported masked mem ----===// +// instrinsics +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass replaces masked memory intrinsics - when unsupported by the target +// - with a chain of basic blocks, that deal with the elements one-by-one if the +// appropriate mask bit is set. +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_TRANSFORMS_SCALAR_SCALARIZE_MASKED_MEMINTRIN_H +#define LLVM_TRANSFORMS_SCALAR_SCALARIZE_MASKED_MEMINTRIN_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct ScalarizeMaskedMemIntrinPass + : public PassInfoMixin<ScalarizeMaskedMemIntrinPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // end namespace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h b/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h new file mode 100644 index 000000000000..5bd6ce164dc3 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h @@ -0,0 +1,27 @@ +//===- SeparateConstOffsetFromGEP.h ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_SEPARATECONSTOFFSETFROMGEP_H +#define LLVM_TRANSFORMS_SCALAR_SEPARATECONSTOFFSETFROMGEP_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class SeparateConstOffsetFromGEPPass + : public PassInfoMixin<SeparateConstOffsetFromGEPPass> { + bool LowerGEP; + +public: + SeparateConstOffsetFromGEPPass(bool LowerGEP = false) : LowerGEP(LowerGEP) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_SEPARATECONSTOFFSETFROMGEP_H diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h index f9792d38bbe6..7c5393851ae6 100644 --- a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h +++ b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h @@ -14,9 +14,9 @@ #ifndef LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H #define LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H -#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Utils/SimplifyCFGOptions.h" namespace llvm { @@ -34,13 +34,7 @@ public: /// rather than optimal IR. That is, by default we bypass transformations that /// are likely to improve performance but make analysis for other passes more /// difficult. - SimplifyCFGPass() - : SimplifyCFGPass(SimplifyCFGOptions() - .forwardSwitchCondToPhi(false) - .convertSwitchToLookupTable(false) - .needCanonicalLoops(true) - .sinkCommonInsts(false)) {} - + SimplifyCFGPass(); /// Construct a pass with optional optimizations. SimplifyCFGPass(const SimplifyCFGOptions &PassOptions); diff --git a/llvm/include/llvm/Transforms/Scalar/StraightLineStrengthReduce.h b/llvm/include/llvm/Transforms/Scalar/StraightLineStrengthReduce.h new file mode 100644 index 000000000000..11233cc65efa --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/StraightLineStrengthReduce.h @@ -0,0 +1,24 @@ +//===- StraightLineStrengthReduce.h - -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_STRAIGHTLINESTRENGTHREDUCE_H +#define LLVM_TRANSFORMS_SCALAR_STRAIGHTLINESTRENGTHREDUCE_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class StraightLineStrengthReducePass + : public PassInfoMixin<StraightLineStrengthReducePass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_STRAIGHTLINESTRENGTHREDUCE_H diff --git a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h new file mode 100644 index 000000000000..50d41acd529e --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h @@ -0,0 +1,20 @@ +//===- StructurizeCFG.h ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_STRUCTURIZECFG_H +#define LLVM_TRANSFORMS_SCALAR_STRUCTURIZECFG_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +struct StructurizeCFGPass : PassInfoMixin<StructurizeCFGPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_STRUCTURIZECFG_H diff --git a/llvm/include/llvm/Transforms/Utils.h b/llvm/include/llvm/Transforms/Utils.h index 75edefac1cbd..9162a86183db 100644 --- a/llvm/include/llvm/Transforms/Utils.h +++ b/llvm/include/llvm/Transforms/Utils.h @@ -117,7 +117,7 @@ extern char &LoopSimplifyID; /// This function returns a new pass that downgrades the debug info in the /// module to line tables only. -ModulePass *createStripNonLineTableDebugInfoPass(); +ModulePass *createStripNonLineTableDebugLegacyPass(); //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h index 0a63654feb98..1dda73913826 100644 --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -74,7 +74,7 @@ bool EliminateUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr, /// in it, fold them away. This handles the case when all entries to the PHI /// nodes in a block are guaranteed equal, such as when the block has exactly /// one predecessor. -void FoldSingleEntryPHINodes(BasicBlock *BB, +bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep = nullptr); /// Examine each PHI in the given block and delete it if it is dead. Also @@ -196,7 +196,8 @@ struct CriticalEdgeSplittingOptions { /// to. BasicBlock *SplitCriticalEdge(Instruction *TI, unsigned SuccNum, const CriticalEdgeSplittingOptions &Options = - CriticalEdgeSplittingOptions()); + CriticalEdgeSplittingOptions(), + const Twine &BBName = ""); inline BasicBlock * SplitCriticalEdge(BasicBlock *BB, succ_iterator SI, @@ -244,19 +245,71 @@ unsigned SplitAllCriticalEdges(Function &F, const CriticalEdgeSplittingOptions &Options = CriticalEdgeSplittingOptions()); -/// Split the edge connecting specified block. +/// Split the edge connecting the specified blocks, and return the newly created +/// basic block between \p From and \p To. BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT = nullptr, LoopInfo *LI = nullptr, - MemorySSAUpdater *MSSAU = nullptr); + MemorySSAUpdater *MSSAU = nullptr, + const Twine &BBName = ""); -/// Split the specified block at the specified instruction - everything before -/// SplitPt stays in Old and everything starting with SplitPt moves to a new -/// block. The two blocks are joined by an unconditional branch and the loop -/// info is updated. +/// Split the specified block at the specified instruction. +/// +/// If \p Before is true, splitBlockBefore handles the block +/// splitting. Otherwise, execution proceeds as described below. +/// +/// Everything before \p SplitPt stays in \p Old and everything starting with \p +/// SplitPt moves to a new block. The two blocks are joined by an unconditional +/// branch. The new block with name \p BBName is returned. +/// +/// FIXME: deprecated, switch to the DomTreeUpdater-based one. +BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, + LoopInfo *LI = nullptr, + MemorySSAUpdater *MSSAU = nullptr, + const Twine &BBName = "", bool Before = false); + +/// Split the specified block at the specified instruction. +/// +/// If \p Before is true, splitBlockBefore handles the block +/// splitting. Otherwise, execution proceeds as described below. +/// +/// Everything before \p SplitPt stays in \p Old and everything starting with \p +/// SplitPt moves to a new block. The two blocks are joined by an unconditional +/// branch. The new block with name \p BBName is returned. BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, - DominatorTree *DT = nullptr, LoopInfo *LI = nullptr, + DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr, MemorySSAUpdater *MSSAU = nullptr, - const Twine &BBName = ""); + const Twine &BBName = "", bool Before = false); + +/// Split the specified block at the specified instruction \p SplitPt. +/// All instructions before \p SplitPt are moved to a new block and all +/// instructions after \p SplitPt stay in the old block. The new block and the +/// old block are joined by inserting an unconditional branch to the end of the +/// new block. The new block with name \p BBName is returned. +BasicBlock *splitBlockBefore(BasicBlock *Old, Instruction *SplitPt, + DomTreeUpdater *DTU, LoopInfo *LI, + MemorySSAUpdater *MSSAU, const Twine &BBName = ""); + +/// This method introduces at least one new basic block into the function and +/// moves some of the predecessors of BB to be predecessors of the new block. +/// The new predecessors are indicated by the Preds array. The new block is +/// given a suffix of 'Suffix'. Returns new basic block to which predecessors +/// from Preds are now pointing. +/// +/// If BB is a landingpad block then additional basicblock might be introduced. +/// It will have Suffix+".split_lp". See SplitLandingPadPredecessors for more +/// details on this case. +/// +/// This currently updates the LLVM IR, DominatorTree, LoopInfo, and LCCSA but +/// no other analyses. In particular, it does not preserve LoopSimplify +/// (because it's complicated to handle the case where one of the edges being +/// split is an exit of a loop with other exits). +/// +/// FIXME: deprecated, switch to the DomTreeUpdater-based one. +BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, + const char *Suffix, DominatorTree *DT, + LoopInfo *LI = nullptr, + MemorySSAUpdater *MSSAU = nullptr, + bool PreserveLCSSA = false); /// This method introduces at least one new basic block into the function and /// moves some of the predecessors of BB to be predecessors of the new block. @@ -274,7 +327,7 @@ BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, /// split is an exit of a loop with other exits). BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, const char *Suffix, - DominatorTree *DT = nullptr, + DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr, MemorySSAUpdater *MSSAU = nullptr, bool PreserveLCSSA = false); @@ -290,10 +343,31 @@ BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds, /// no other analyses. In particular, it does not preserve LoopSimplify /// (because it's complicated to handle the case where one of the edges being /// split is an exit of a loop with other exits). +/// +/// FIXME: deprecated, switch to the DomTreeUpdater-based one. +void SplitLandingPadPredecessors(BasicBlock *OrigBB, + ArrayRef<BasicBlock *> Preds, + const char *Suffix, const char *Suffix2, + SmallVectorImpl<BasicBlock *> &NewBBs, + DominatorTree *DT, LoopInfo *LI = nullptr, + MemorySSAUpdater *MSSAU = nullptr, + bool PreserveLCSSA = false); + +/// This method transforms the landing pad, OrigBB, by introducing two new basic +/// blocks into the function. One of those new basic blocks gets the +/// predecessors listed in Preds. The other basic block gets the remaining +/// predecessors of OrigBB. The landingpad instruction OrigBB is clone into both +/// of the new basic blocks. The new blocks are given the suffixes 'Suffix1' and +/// 'Suffix2', and are returned in the NewBBs vector. +/// +/// This currently updates the LLVM IR, DominatorTree, LoopInfo, and LCCSA but +/// no other analyses. In particular, it does not preserve LoopSimplify +/// (because it's complicated to handle the case where one of the edges being +/// split is an exit of a loop with other exits). void SplitLandingPadPredecessors( BasicBlock *OrigBB, ArrayRef<BasicBlock *> Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl<BasicBlock *> &NewBBs, - DominatorTree *DT = nullptr, LoopInfo *LI = nullptr, + DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr, MemorySSAUpdater *MSSAU = nullptr, bool PreserveLCSSA = false); /// This method duplicates the specified return instruction into a predecessor @@ -325,10 +399,39 @@ ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, /// Returns the NewBasicBlock's terminator. /// /// Updates DT and LI if given. +/// +/// FIXME: deprecated, switch to the DomTreeUpdater-based one. +Instruction *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, + bool Unreachable, MDNode *BranchWeights, + DominatorTree *DT, + LoopInfo *LI = nullptr, + BasicBlock *ThenBlock = nullptr); + +/// Split the containing block at the specified instruction - everything before +/// SplitBefore stays in the old basic block, and the rest of the instructions +/// in the BB are moved to a new block. The two blocks are connected by a +/// conditional branch (with value of Cmp being the condition). +/// Before: +/// Head +/// SplitBefore +/// Tail +/// After: +/// Head +/// if (Cond) +/// ThenBlock +/// SplitBefore +/// Tail +/// +/// If \p ThenBlock is not specified, a new block will be created for it. +/// If \p Unreachable is true, the newly created block will end with +/// UnreachableInst, otherwise it branches to Tail. +/// Returns the NewBasicBlock's terminator. +/// +/// Updates DT and LI if given. Instruction *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights = nullptr, - DominatorTree *DT = nullptr, + DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr, BasicBlock *ThenBlock = nullptr); diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h index 90517e806e02..e7d41933a6c9 100644 --- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -96,6 +96,10 @@ namespace llvm { IRBuilderBase &B, const DataLayout &DL, const TargetLibraryInfo *TLI); + /// Emit a call to the mempcpy function. + Value *emitMemPCpy(Value *Dst, Value *Src, Value *Len, IRBuilderBase &B, + const DataLayout &DL, const TargetLibraryInfo *TLI); + /// Emit a call to the memchr function. This assumes that Ptr is a pointer, /// Val is an i32 value, and Len is an 'intptr_t' value. Value *emitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilderBase &B, diff --git a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h index 22954b469186..f8211d60938e 100644 --- a/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h +++ b/llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h @@ -87,7 +87,7 @@ public: /// If a new function was created by outlining, this method can be called /// to update the call graph for the new function. Note that the old one /// still needs to be re-analyzed or manually updated. - void registerOutlinedFunction(Function &NewFn); + void registerOutlinedFunction(Function &OriginalFn, Function &NewFn); /// Replace \p OldFn in the call graph (and SCC) with \p NewFn. The uses /// outside the call graph and the function \p OldFn are not modified. diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index dffb7801bc8e..56aaa5d48e2a 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -268,6 +268,42 @@ void updateProfileCallee( Function *Callee, int64_t entryDelta, const ValueMap<const Value *, WeakTrackingVH> *VMap = nullptr); +/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified +/// basic blocks and extract their scope. These are candidates for duplication +/// when cloning. +void identifyNoAliasScopesToClone( + ArrayRef<BasicBlock *> BBs, SmallVectorImpl<MDNode *> &NoAliasDeclScopes); + +/// Duplicate the specified list of noalias decl scopes. +/// The 'Ext' string is added as an extension to the name. +/// Afterwards, the ClonedScopes contains the mapping of the original scope +/// MDNode onto the cloned scope. +/// Be aware that the cloned scopes are still part of the original scope domain. +void cloneNoAliasScopes( + ArrayRef<MDNode *> NoAliasDeclScopes, + DenseMap<MDNode *, MDNode *> &ClonedScopes, + StringRef Ext, LLVMContext &Context); + +/// Adapt the metadata for the specified instruction according to the +/// provided mapping. This is normally used after cloning an instruction, when +/// some noalias scopes needed to be cloned. +void adaptNoAliasScopes( + llvm::Instruction *I, const DenseMap<MDNode *, MDNode *> &ClonedScopes, + LLVMContext &Context); + +/// Clone the specified noalias decl scopes. Then adapt all instructions in the +/// NewBlocks basicblocks to the cloned versions. +/// 'Ext' will be added to the duplicate scope names. +void cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes, + ArrayRef<BasicBlock *> NewBlocks, + LLVMContext &Context, StringRef Ext); + +/// Clone the specified noalias decl scopes. Then adapt all instructions in the +/// [IStart, IEnd] (IEnd included !) range to the cloned versions. 'Ext' will be +/// added to the duplicate scope names. +void cloneAndAdaptNoAliasScopes(ArrayRef<MDNode *> NoAliasDeclScopes, + Instruction *IStart, Instruction *IEnd, + LLVMContext &Context, StringRef Ext); } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_CLONING_H diff --git a/llvm/include/llvm/Transforms/Utils/Debugify.h b/llvm/include/llvm/Transforms/Utils/Debugify.h index 6f11d0a7d062..30e7d8e87adf 100644 --- a/llvm/include/llvm/Transforms/Utils/Debugify.h +++ b/llvm/include/llvm/Transforms/Utils/Debugify.h @@ -13,8 +13,11 @@ #ifndef LLVM_TRANSFORM_UTILS_DEBUGIFY_H #define LLVM_TRANSFORM_UTILS_DEBUGIFY_H -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Bitcode/BitcodeWriterPass.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/PassManager.h" namespace llvm { @@ -37,8 +40,6 @@ bool applyDebugifyMetadata( /// Returns true if any change was made. bool stripDebugifyMetadata(Module &M); -} // namespace llvm - llvm::ModulePass *createDebugifyModulePass(); llvm::FunctionPass *createDebugifyFunctionPass(); @@ -74,6 +75,8 @@ struct DebugifyStatistics { /// Map pass names to a per-pass DebugifyStatistics instance. using DebugifyStatsMap = llvm::MapVector<llvm::StringRef, DebugifyStatistics>; +void exportDebugifyStats(StringRef Path, const DebugifyStatsMap &Map); + llvm::ModulePass * createCheckDebugifyModulePass(bool Strip = false, llvm::StringRef NameOfWrappedPass = "", @@ -89,4 +92,60 @@ struct NewPMCheckDebugifyPass llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &AM); }; +struct DebugifyEachInstrumentation { + DebugifyStatsMap StatsMap; + + void registerCallbacks(PassInstrumentationCallbacks &PIC); +}; + +/// DebugifyCustomPassManager wraps each pass with the debugify passes if +/// needed. +/// NOTE: We support legacy custom pass manager only. +/// TODO: Add New PM support for custom pass manager. +class DebugifyCustomPassManager : public legacy::PassManager { + DebugifyStatsMap DIStatsMap; + bool EnableDebugifyEach = false; + +public: + using super = legacy::PassManager; + + void add(Pass *P) override { + // Wrap each pass with (-check)-debugify passes if requested, making + // exceptions for passes which shouldn't see -debugify instrumentation. + bool WrapWithDebugify = EnableDebugifyEach && !P->getAsImmutablePass() && + !isIRPrintingPass(P) && !isBitcodeWriterPass(P); + if (!WrapWithDebugify) { + super::add(P); + return; + } + + // Apply -debugify/-check-debugify before/after each pass and collect + // debug info loss statistics. + PassKind Kind = P->getPassKind(); + StringRef Name = P->getPassName(); + + // TODO: Implement Debugify for LoopPass. + switch (Kind) { + case PT_Function: + super::add(createDebugifyFunctionPass()); + super::add(P); + super::add(createCheckDebugifyFunctionPass(true, Name, &DIStatsMap)); + break; + case PT_Module: + super::add(createDebugifyModulePass()); + super::add(P); + super::add(createCheckDebugifyModulePass(true, Name, &DIStatsMap)); + break; + default: + super::add(P); + break; + } + } + + void enableDebugifyEach() { EnableDebugifyEach = true; } + + const DebugifyStatsMap &getDebugifyStatsMap() const { return DIStatsMap; } +}; +} // namespace llvm + #endif // LLVM_TRANSFORM_UTILS_DEBUGIFY_H diff --git a/llvm/include/llvm/Transforms/Utils/FixIrreducible.h b/llvm/include/llvm/Transforms/Utils/FixIrreducible.h new file mode 100644 index 000000000000..0c00b7bdbaf9 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/FixIrreducible.h @@ -0,0 +1,20 @@ +//===- FixIrreducible.h - Convert irreducible control-flow into loops -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_FIXIRREDUCIBLE_H +#define LLVM_TRANSFORMS_UTILS_FIXIRREDUCIBLE_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +struct FixIrreduciblePass : PassInfoMixin<FixIrreduciblePass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_FIXIRREDUCIBLE_H diff --git a/llvm/include/llvm/Transforms/Utils/InstructionNamer.h b/llvm/include/llvm/Transforms/Utils/InstructionNamer.h new file mode 100644 index 000000000000..4f4cc2666f10 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/InstructionNamer.h @@ -0,0 +1,20 @@ +//===- InstructionNamer.h - Give anonymous instructions names -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_INSTRUCTIONNAMER_H +#define LLVM_TRANSFORMS_UTILS_INSTRUCTIONNAMER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +struct InstructionNamerPass : PassInfoMixin<InstructionNamerPass> { + PreservedAnalyses run(Function &, FunctionAnalysisManager &); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_INSTRUCTIONNAMER_H diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index f55e336f1f6a..c712dda483e4 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -16,7 +16,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/Utils/Local.h" @@ -30,6 +29,8 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include <cstdint> #include <limits> @@ -58,73 +59,6 @@ class StoreInst; class TargetLibraryInfo; class TargetTransformInfo; -/// A set of parameters used to control the transforms in the SimplifyCFG pass. -/// Options may change depending on the position in the optimization pipeline. -/// For example, canonical form that includes switches and branches may later be -/// replaced by lookup tables and selects. -struct SimplifyCFGOptions { - int BonusInstThreshold; - bool ForwardSwitchCondToPhi; - bool ConvertSwitchToLookupTable; - bool NeedCanonicalLoop; - bool SinkCommonInsts; - bool SimplifyCondBranch; - bool FoldTwoEntryPHINode; - - AssumptionCache *AC; - - SimplifyCFGOptions(unsigned BonusThreshold = 1, - bool ForwardSwitchCond = false, - bool SwitchToLookup = false, bool CanonicalLoops = true, - bool SinkCommon = false, - AssumptionCache *AssumpCache = nullptr, - bool SimplifyCondBranch = true, - bool FoldTwoEntryPHINode = true) - : BonusInstThreshold(BonusThreshold), - ForwardSwitchCondToPhi(ForwardSwitchCond), - ConvertSwitchToLookupTable(SwitchToLookup), - NeedCanonicalLoop(CanonicalLoops), - SinkCommonInsts(SinkCommon), - SimplifyCondBranch(SimplifyCondBranch), - FoldTwoEntryPHINode(FoldTwoEntryPHINode), - AC(AssumpCache) {} - - // Support 'builder' pattern to set members by name at construction time. - SimplifyCFGOptions &bonusInstThreshold(int I) { - BonusInstThreshold = I; - return *this; - } - SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) { - ForwardSwitchCondToPhi = B; - return *this; - } - SimplifyCFGOptions &convertSwitchToLookupTable(bool B) { - ConvertSwitchToLookupTable = B; - return *this; - } - SimplifyCFGOptions &needCanonicalLoops(bool B) { - NeedCanonicalLoop = B; - return *this; - } - SimplifyCFGOptions &sinkCommonInsts(bool B) { - SinkCommonInsts = B; - return *this; - } - SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) { - AC = Cache; - return *this; - } - SimplifyCFGOptions &setSimplifyCondBranch(bool B) { - SimplifyCondBranch = B; - return *this; - } - - SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) { - FoldTwoEntryPHINode = B; - return *this; - } -}; - //===----------------------------------------------------------------------===// // Local constant propagation. // @@ -160,7 +94,9 @@ bool wouldInstructionBeTriviallyDead(Instruction *I, /// recursively. Return true if any instructions were deleted. bool RecursivelyDeleteTriviallyDeadInstructions( Value *V, const TargetLibraryInfo *TLI = nullptr, - MemorySSAUpdater *MSSAU = nullptr); + MemorySSAUpdater *MSSAU = nullptr, + std::function<void(Value *)> AboutToDeleteCallback = + std::function<void(Value *)>()); /// Delete all of the instructions in `DeadInsts`, and all other instructions /// that deleting these in turn causes to be trivially dead. @@ -172,7 +108,9 @@ bool RecursivelyDeleteTriviallyDeadInstructions( /// empty afterward. void RecursivelyDeleteTriviallyDeadInstructions( SmallVectorImpl<WeakTrackingVH> &DeadInsts, - const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr); + const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr, + std::function<void(Value *)> AboutToDeleteCallback = + std::function<void(Value *)>()); /// Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow /// instructions that are not trivially dead. These will be ignored. @@ -180,7 +118,9 @@ void RecursivelyDeleteTriviallyDeadInstructions( /// were found and deleted. bool RecursivelyDeleteTriviallyDeadInstructionsPermissive( SmallVectorImpl<WeakTrackingVH> &DeadInsts, - const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr); + const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr, + std::function<void(Value *)> AboutToDeleteCallback = + std::function<void(Value *)>()); /// If the specified value is an effectively dead PHI node, due to being a /// def-use chain of single-use nodes that either forms a cycle or is terminated @@ -209,20 +149,6 @@ bool replaceDbgUsesWithUndef(Instruction *I); // Control Flow Graph Restructuring. // -/// Like BasicBlock::removePredecessor, this method is called when we're about -/// to delete Pred as a predecessor of BB. If BB contains any PHI nodes, this -/// drops the entries in the PHI nodes for Pred. -/// -/// Unlike the removePredecessor method, this attempts to simplify uses of PHI -/// nodes that collapse into identity values. For example, if we have: -/// x = phi(1, 0, 0, 0) -/// y = and x, z -/// -/// .. and delete the predecessor corresponding to the '1', this will attempt to -/// recursively fold the 'and' to 0. -void RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, - DomTreeUpdater *DTU = nullptr); - /// BB is a block with one predecessor and its predecessor is known to have one /// successor (BB!). Eliminate the edge between them, moving the instructions in /// the predecessor into BB. This deletes the predecessor block. @@ -246,9 +172,11 @@ bool EliminateDuplicatePHINodes(BasicBlock *BB); /// It returns true if a modification was made, possibly deleting the basic /// block that was pointed to. LoopHeaders is an optional input parameter /// providing the set of loop headers that SimplifyCFG should not eliminate. +extern cl::opt<bool> RequireAndPreserveDomTree; bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, + DomTreeUpdater *DTU = nullptr, const SimplifyCFGOptions &Options = {}, - SmallPtrSetImpl<BasicBlock *> *LoopHeaders = nullptr); + ArrayRef<WeakVH> LoopHeaders = {}); /// This function is used to flatten a CFG. For example, it uses parallel-and /// and parallel-or mode to collapse if-conditions and merge if-regions with @@ -258,7 +186,9 @@ bool FlattenCFG(BasicBlock *BB, AAResults *AA = nullptr); /// If this basic block is ONLY a setcc and a branch, and if a predecessor /// branches to us and one of our successors, fold the setcc into the /// predecessor and use logical operations to pick the right destination. -bool FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU = nullptr, +bool FoldBranchToCommonDest(BranchInst *BI, llvm::DomTreeUpdater *DTU = nullptr, + MemorySSAUpdater *MSSAU = nullptr, + const TargetTransformInfo *TTI = nullptr, unsigned BonusInstThreshold = 1); /// This function takes a virtual register computed by an Instruction and @@ -365,10 +295,6 @@ bool replaceDbgDeclare(Value *Address, Value *NewAddress, DIBuilder &Builder, void replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress, DIBuilder &Builder, int Offset = 0); -/// Finds alloca where the value comes from. -AllocaInst *findAllocaForValue(Value *V, - DenseMap<Value *, AllocaInst *> &AllocaForValue); - /// Assuming the instruction \p I is going to be deleted, attempt to salvage /// debug users of \p I by writing the effect of \p I in a DIExpression. If it /// cannot be salvaged changes its debug uses to undef. @@ -406,9 +332,13 @@ DIExpression *salvageDebugInfoImpl(Instruction &I, DIExpression *DIExpr, bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT); -/// Remove all instructions from a basic block other than it's terminator -/// and any present EH pad instructions. -unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB); +/// Remove all instructions from a basic block other than its terminator +/// and any present EH pad instructions. Returns a pair where the first element +/// is the number of instructions (excluding debug info instrinsics) that have +/// been removed, and the second element is the number of debug info intrinsics +/// that have been removed. +std::pair<unsigned, unsigned> +removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB); /// Insert an unreachable instruction before the specified /// instruction, making it and the rest of the code in the block dead. diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h new file mode 100644 index 000000000000..8f857e1e5c21 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h @@ -0,0 +1,40 @@ +//===- llvm/Transforms/Utils/LoopPeel.h ----- Peeling utilities -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines some loop peeling utilities. It does not define any +// actual pass or policy. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_LOOPPEEL_H +#define LLVM_TRANSFORMS_UTILS_LOOPPEEL_H + +#include "llvm/Analysis/TargetTransformInfo.h" + +namespace llvm { + +bool canPeel(Loop *L); + +bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, + DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); + +TargetTransformInfo::PeelingPreferences +gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, + const TargetTransformInfo &TTI, + Optional<bool> UserAllowPeeling, + Optional<bool> UserAllowProfileBasedPeeling, + bool UnrollingSpecficValues = false); + +void computePeelCount(Loop *L, unsigned LoopSize, + TargetTransformInfo::PeelingPreferences &PP, + unsigned &TripCount, ScalarEvolution &SE, + unsigned Threshold = UINT_MAX); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_LOOPPEEL_H diff --git a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h index 1e80722ed8b8..61bf93b74a15 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h @@ -33,7 +33,8 @@ class TargetTransformInfo; bool LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, - bool RotationOnly, unsigned Threshold, bool IsUtilMode); + bool RotationOnly, unsigned Threshold, bool IsUtilMode, + bool PrepareForLTO = false); } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 60446bca5317..951660bbab28 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -26,6 +26,8 @@ class AAResults; class AliasSet; class AliasSetTracker; class BasicBlock; +class BlockFrequencyInfo; +class ICFLoopSafetyInfo; class IRBuilderBase; class Loop; class LoopInfo; @@ -38,7 +40,6 @@ class ScalarEvolution; class SCEV; class SCEVExpander; class TargetLibraryInfo; -class TargetTransformInfo; class LPPassManager; class Instruction; struct RuntimeCheckingPtrGroup; @@ -74,9 +75,14 @@ bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, /// changes to CFG, preserved. /// /// Returns true if any modifications are made. -bool formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist, - const DominatorTree &DT, const LoopInfo &LI, - ScalarEvolution *SE); +/// +/// This function may introduce unused PHI nodes. If \p PHIsToRemove is not +/// nullptr, those are added to it (before removing, the caller has to check if +/// they still do not have any uses). Otherwise the PHIs are directly removed. +bool formLCSSAForInstructions( + SmallVectorImpl<Instruction *> &Worklist, const DominatorTree &DT, + const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, + SmallVectorImpl<PHINode *> *PHIsToRemove = nullptr); /// Put loop into LCSSA form. /// @@ -105,9 +111,28 @@ bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE); -struct SinkAndHoistLICMFlags { - bool NoOfMemAccTooLarge; - unsigned LicmMssaOptCounter; +/// Flags controlling how much is checked when sinking or hoisting +/// instructions. The number of memory access in the loop (and whether there +/// are too many) is determined in the constructors when using MemorySSA. +class SinkAndHoistLICMFlags { +public: + // Explicitly set limits. + SinkAndHoistLICMFlags(unsigned LicmMssaOptCap, + unsigned LicmMssaNoAccForPromotionCap, bool IsSink, + Loop *L = nullptr, MemorySSA *MSSA = nullptr); + // Use default limits. + SinkAndHoistLICMFlags(bool IsSink, Loop *L = nullptr, + MemorySSA *MSSA = nullptr); + + void setIsSink(bool B) { IsSink = B; } + bool getIsSink() { return IsSink; } + bool tooManyMemoryAccesses() { return NoOfMemAccTooLarge; } + bool tooManyClobberingCalls() { return LicmMssaOptCounter >= LicmMssaOptCap; } + void incrementClobberingCalls() { ++LicmMssaOptCounter; } + +protected: + bool NoOfMemAccTooLarge = false; + unsigned LicmMssaOptCounter = 0; unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; bool IsSink; @@ -118,12 +143,13 @@ struct SinkAndHoistLICMFlags { /// reverse depth first order w.r.t the DominatorTree. This allows us to visit /// uses before definitions, allowing us to sink a loop body in one pass without /// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, -/// TargetLibraryInfo, Loop, AliasSet information for all +/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all /// instructions of the loop and loop safety information as /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, - TargetLibraryInfo *, TargetTransformInfo *, Loop *, - AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, + BlockFrequencyInfo *, TargetLibraryInfo *, + TargetTransformInfo *, Loop *, AliasSetTracker *, + MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); /// Walk the specified region of the CFG (defined by all blocks @@ -131,13 +157,14 @@ bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, /// first order w.r.t the DominatorTree. This allows us to visit definitions /// before uses, allowing us to hoist a loop body in one pass without iteration. /// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, -/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the -/// loop and loop safety information as arguments. Diagnostics is emitted via \p -/// ORE. It returns changed status. +/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all +/// instructions of the loop and loop safety information as arguments. +/// Diagnostics is emitted via \p ORE. It returns changed status. bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, - TargetLibraryInfo *, Loop *, AliasSetTracker *, - MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); + BlockFrequencyInfo *, TargetLibraryInfo *, Loop *, + AliasSetTracker *, MemorySSAUpdater *, ScalarEvolution *, + ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, + OptimizationRemarkEmitter *); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. @@ -153,6 +180,12 @@ bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA = nullptr); +/// Remove the backedge of the specified loop. Handles loop nests and general +/// loop structures subject to the precondition that the loop has no parent +/// loop and has a single latch block. Preserves all listed analyses. +void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, MemorySSA *MSSA); + /// Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over /// the stores in the loop, looking for stores to Must pointers which are @@ -187,6 +220,13 @@ Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop, /// Find named metadata for a loop with an integer value. llvm::Optional<int> getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name); +/// Find a combination of metadata ("llvm.loop.vectorize.width" and +/// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a +/// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found +/// then None is returned. +Optional<ElementCount> +getOptionalElementCountLoopAttribute(Loop *TheLoop); + /// Create a new loop identifier for a loop created from a loop transformation. /// /// @param OrigLoopID The loop ID of the loop before the transformation. @@ -222,6 +262,9 @@ bool hasDisableAllTransformsHint(const Loop *L); /// Look for the loop attribute that disables the LICM transformation heuristics. bool hasDisableLICMTransformsHint(const Loop *L); +/// Look for the loop attribute that requires progress within the loop. +bool hasMustProgress(const Loop *L); + /// The mode sets how eager a transformation should be applied. enum TransformationMode { /// The pass can use heuristics to determine whether a transformation should @@ -264,6 +307,9 @@ TransformationMode hasLICMVersioningTransformation(Loop *L); void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V = 0); +/// Returns true if Name is applied to TheLoop and enabled. +bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name); + /// Returns a loop's estimated trip count based on branch weight metadata. /// In addition if \p EstimatedLoopInvocationWeight is not null it is /// initialized with weight of loop's latch leading to the exit. @@ -309,35 +355,29 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr); /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. -Value *createMinMaxOp(IRBuilderBase &Builder, - RecurrenceDescriptor::MinMaxRecurrenceKind RK, - Value *Left, Value *Right); +Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, + Value *Right); /// Generates an ordered vector reduction using extracts to reduce the value. -Value * -getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, - RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind = - RecurrenceDescriptor::MRK_Invalid, - ArrayRef<Value *> RedOps = None); +Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, + unsigned Op, RecurKind MinMaxKind = RecurKind::None, + ArrayRef<Value *> RedOps = None); /// Generates a vector reduction using shufflevectors to reduce the value. /// Fast-math-flags are propagated using the IRBuilder's setting. Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, - RecurrenceDescriptor::MinMaxRecurrenceKind - MinMaxKind = RecurrenceDescriptor::MRK_Invalid, + RecurKind MinMaxKind = RecurKind::None, ArrayRef<Value *> RedOps = None); /// Create a target reduction of the given vector. The reduction operation /// is described by the \p Opcode parameter. min/max reductions require -/// additional information supplied in \p Flags. +/// additional information supplied in \p RdxKind. /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. /// Fast-math-flags are propagated using the IRBuilder's setting. Value *createSimpleTargetReduction(IRBuilderBase &B, - const TargetTransformInfo *TTI, - unsigned Opcode, Value *Src, - TargetTransformInfo::ReductionFlags Flags = - TargetTransformInfo::ReductionFlags(), + const TargetTransformInfo *TTI, Value *Src, + RecurKind RdxKind, ArrayRef<Value *> RedOps = None); /// Create a generic target reduction using a recurrence descriptor \p Desc @@ -345,8 +385,7 @@ Value *createSimpleTargetReduction(IRBuilderBase &B, /// required to implement the reduction. /// Fast-math-flags are propagated using the RecurrenceDescriptor. Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, - RecurrenceDescriptor &Desc, Value *Src, - bool NoNaN = false); + RecurrenceDescriptor &Desc, Value *Src); /// Get the intersection (logical and) of all of the potential IR flags /// of each scalar operation (VL) that will be converted into a vector (I). diff --git a/llvm/include/llvm/Transforms/Utils/LoopVersioning.h b/llvm/include/llvm/Transforms/Utils/LoopVersioning.h index 1efdcc65b39a..4a8831ed45b2 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopVersioning.h +++ b/llvm/include/llvm/Transforms/Utils/LoopVersioning.h @@ -16,6 +16,7 @@ #define LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/PassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -24,7 +25,6 @@ namespace llvm { class Loop; class LoopAccessInfo; class LoopInfo; -class ScalarEvolution; struct RuntimeCheckingPtrGroup; typedef std::pair<const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup *> @@ -43,9 +43,9 @@ public: /// It uses runtime check provided by the user. If \p UseLAIChecks is true, /// we will retain the default checks made by LAI. Otherwise, construct an /// object having no checks and we expect the user to add them. - LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, - DominatorTree *DT, ScalarEvolution *SE, - bool UseLAIChecks = true); + LoopVersioning(const LoopAccessInfo &LAI, + ArrayRef<RuntimePointerCheck> Checks, Loop *L, LoopInfo *LI, + DominatorTree *DT, ScalarEvolution *SE); /// Performs the CFG manipulation part of versioning the loop including /// the DominatorTree and LoopInfo updates. @@ -75,12 +75,6 @@ public: /// loop may alias (i.e. one of the memchecks failed). Loop *getNonVersionedLoop() { return NonVersionedLoop; } - /// Sets the runtime alias checks for versioning the loop. - void setAliasChecks(ArrayRef<RuntimePointerCheck> Checks); - - /// Sets the runtime SCEV checks for versioning the loop. - void setSCEVChecks(SCEVUnionPredicate Check); - /// Annotate memory instructions in the versioned loop with no-alias /// metadata based on the memchecks issued. /// @@ -129,7 +123,7 @@ private: SmallVector<RuntimePointerCheck, 4> AliasChecks; /// The set of SCEV checks that we are versioning for. - SCEVUnionPredicate Preds; + const SCEVUnionPredicate &Preds; /// Maps a pointer to the pointer checking group that the pointer /// belongs to. @@ -148,6 +142,14 @@ private: DominatorTree *DT; ScalarEvolution *SE; }; + +/// Expose LoopVersioning as a pass. Currently this is only used for +/// unit-testing. It adds all memchecks necessary to remove all may-aliasing +/// array accesses from the loop. +class LoopVersioningPass : public PassInfoMixin<LoopVersioningPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; } #endif diff --git a/llvm/include/llvm/Transforms/Utils/LowerSwitch.h b/llvm/include/llvm/Transforms/Utils/LowerSwitch.h new file mode 100644 index 000000000000..97086987ffcb --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/LowerSwitch.h @@ -0,0 +1,26 @@ +//===- LowerSwitch.h - Eliminate Switch instructions ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The LowerSwitch transformation rewrites switch instructions with a sequence +// of branches, which allows targets to get away with not implementing the +// switch instruction until it is convenient. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_LOWERSWITCH_H +#define LLVM_TRANSFORMS_UTILS_LOWERSWITCH_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +struct LowerSwitchPass : public PassInfoMixin<LowerSwitchPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_LOWERSWITCH_H diff --git a/llvm/include/llvm/Transforms/Utils/MatrixUtils.h b/llvm/include/llvm/Transforms/Utils/MatrixUtils.h new file mode 100644 index 000000000000..39a0d4bf40cc --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/MatrixUtils.h @@ -0,0 +1,94 @@ +//===- MatrixUtils.h - Utilities to lower matrix intrinsics -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utilities for generating tiled loops for matrix operations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_MATRIXUTILS_H +#define LLVM_TRANSFORMS_UTILS_MATRIXUTILS_H + +#include "llvm/ADT/StringRef.h" + +namespace llvm { +class DomTreeUpdater; +class BasicBlock; +class Value; +class Loop; +class LoopInfo; +class IRBuilderBase; + +/// A helper struct to create IR loop nests for tiling in IR of the following +/// form: +/// for CurrentColumn = 0..NumColumns +/// for CurrentRow = 0..NumRows +/// for CurrentInner = 0..NumInner +struct TileInfo { + /// Number of rows of the matrix. + unsigned NumRows; + + /// Number of columns of the matrix. + unsigned NumColumns; + + /// Number of columns of the first matrix of a multiply / + /// number of rows of the second matrix of a multiply. + unsigned NumInner; + + /// Number of rows/columns in a tile. + unsigned TileSize = -1; + + /// Start row of the current tile to compute. + Value *CurrentRow; + + /// Start column of the current tile to compute. + Value *CurrentCol; + + /// Current tile offset during the tile computation. + Value *CurrentK; + + /// Header of the outermost loop iterating from 0..NumColumns. + BasicBlock *ColumnLoopHeader = nullptr; + + /// Header of the second loop iterating from 0..NumRows. + BasicBlock *RowLoopHeader = nullptr; + /// Latch of the second loop iterating from 0..NumRows. + BasicBlock *RowLoopLatch = nullptr; + /// Header of the innermost loop iterating from 0..NumInner. + BasicBlock *InnerLoopHeader = nullptr; + /// Latch of the innermost loop iterating from 0..NumInner. + BasicBlock *InnerLoopLatch = nullptr; + + TileInfo(unsigned NumRows, unsigned NumColumns, unsigned NumInner, + unsigned TileSize) + : NumRows(NumRows), NumColumns(NumColumns), NumInner(NumInner), + TileSize(TileSize) {} + + /// Creates an IR loop nests for tiling of the form below. Returns the block + /// for the inner loop body and sets {Column,Row,Inner}LoopHeader/Latch + /// fields. + /// + /// for CurrentColumn = 0..NumColumns + /// for CurrentRow = 0..NumRows + /// for CurrentInner = 0..NumInner + BasicBlock *CreateTiledLoops(BasicBlock *Start, BasicBlock *End, + IRBuilderBase &B, DomTreeUpdater &DTU, + LoopInfo &LI); + +private: + /// Creates a new loop with header, body and latch blocks that iterates from + /// [0, Bound). Updates \p Preheader to branch to the new header and uses \p + /// Exit as exit block. Adds the new loop blocks to \L and applies dominator + /// tree updates to \p DTU. + static BasicBlock *CreateLoop(BasicBlock *Preheader, BasicBlock *Exit, + Value *Bound, Value *Step, StringRef Name, + IRBuilderBase &B, DomTreeUpdater &DTU, Loop *L, + LoopInfo &LI); +}; +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/Utils/MetaRenamer.h b/llvm/include/llvm/Transforms/Utils/MetaRenamer.h new file mode 100644 index 000000000000..fff3dff75837 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/MetaRenamer.h @@ -0,0 +1,26 @@ +//===- MetaRenamer.h - Rename everything with metasyntatic names ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass renames everything with metasyntatic names. The intent is to use +// this pass after bugpoint reduction to conceal the nature of the original +// program. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_METARENAMER_H +#define LLVM_TRANSFORMS_UTILS_METARENAMER_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +struct MetaRenamerPass : PassInfoMixin<MetaRenamerPass> { + PreservedAnalyses run(Module &, ModuleAnalysisManager &); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_METARENAMER_H diff --git a/llvm/include/llvm/Transforms/Utils/MisExpect.h b/llvm/include/llvm/Transforms/Utils/MisExpect.h deleted file mode 100644 index 1dbe8cb95936..000000000000 --- a/llvm/include/llvm/Transforms/Utils/MisExpect.h +++ /dev/null @@ -1,43 +0,0 @@ -//===--- MisExpect.h - Check the use of llvm.expect with PGO data ---------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This contains code to emit warnings for potentially incorrect usage of the -// llvm.expect intrinsic. This utility extracts the threshold values from -// metadata associated with the instrumented Branch or Switch instruction. The -// threshold values are then used to determine if a warning should be emmited. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" - -namespace llvm { -namespace misexpect { - -/// verifyMisExpect - compares PGO counters to the thresholds used for -/// llvm.expect and warns if the PGO counters are outside of the expected -/// range. -/// \param I The Instruction being checked -/// \param Weights A vector of profile weights for each target block -/// \param Ctx The current LLVM context -void verifyMisExpect(llvm::Instruction *I, - const llvm::SmallVector<uint32_t, 4> &Weights, - llvm::LLVMContext &Ctx); - -/// checkClangInstrumentation - verify if llvm.expect matches PGO profile -/// This function checks the frontend instrumentation in the backend when -/// lowering llvm.expect intrinsics. It checks for existing metadata, and -/// then validates the use of llvm.expect against the assigned branch weights. -// -/// \param I the Instruction being checked -void checkFrontendInstrumentation(Instruction &I); - -} // namespace misexpect -} // namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h index 657b97c67a8b..c922476ac79d 100644 --- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h +++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h @@ -70,6 +70,13 @@ class raw_ostream; enum PredicateType { PT_Branch, PT_Assume, PT_Switch }; +/// Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op +/// is the value the constraint applies to (the ssa.copy result). +struct PredicateConstraint { + CmpInst::Predicate Predicate; + Value *OtherOp; +}; + // Base class for all predicate information we provide. // All of our predicate information has at least a comparison. class PredicateBase : public ilist_node<PredicateBase> { @@ -83,37 +90,34 @@ public: // predicates, this is different to OriginalOp which refers to the initial // operand. Value *RenamedOp; + // The condition associated with this predicate. + Value *Condition; + PredicateBase(const PredicateBase &) = delete; PredicateBase &operator=(const PredicateBase &) = delete; PredicateBase() = delete; virtual ~PredicateBase() = default; - -protected: - PredicateBase(PredicateType PT, Value *Op) : Type(PT), OriginalOp(Op) {} -}; - -class PredicateWithCondition : public PredicateBase { -public: - Value *Condition; static bool classof(const PredicateBase *PB) { return PB->Type == PT_Assume || PB->Type == PT_Branch || PB->Type == PT_Switch; } + /// Fetch condition in the form of PredicateConstraint, if possible. + Optional<PredicateConstraint> getConstraint() const; + protected: - PredicateWithCondition(PredicateType PT, Value *Op, Value *Condition) - : PredicateBase(PT, Op), Condition(Condition) {} + PredicateBase(PredicateType PT, Value *Op, Value *Condition) + : Type(PT), OriginalOp(Op), Condition(Condition) {} }; // Provides predicate information for assumes. Since assumes are always true, // we simply provide the assume instruction, so you can tell your relative // position to it. -class PredicateAssume : public PredicateWithCondition { +class PredicateAssume : public PredicateBase { public: IntrinsicInst *AssumeInst; PredicateAssume(Value *Op, IntrinsicInst *AssumeInst, Value *Condition) - : PredicateWithCondition(PT_Assume, Op, Condition), - AssumeInst(AssumeInst) {} + : PredicateBase(PT_Assume, Op, Condition), AssumeInst(AssumeInst) {} PredicateAssume() = delete; static bool classof(const PredicateBase *PB) { return PB->Type == PT_Assume; @@ -123,7 +127,7 @@ public: // Mixin class for edge predicates. The FROM block is the block where the // predicate originates, and the TO block is the block where the predicate is // valid. -class PredicateWithEdge : public PredicateWithCondition { +class PredicateWithEdge : public PredicateBase { public: BasicBlock *From; BasicBlock *To; @@ -135,7 +139,7 @@ public: protected: PredicateWithEdge(PredicateType PType, Value *Op, BasicBlock *From, BasicBlock *To, Value *Cond) - : PredicateWithCondition(PType, Op, Cond), From(From), To(To) {} + : PredicateBase(PType, Op, Cond), From(From), To(To) {} }; // Provides predicate information for branches. diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index 0c88f9f79e76..547245cfb963 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -26,410 +26,486 @@ #include "llvm/Support/CommandLine.h" namespace llvm { - extern cl::opt<unsigned> SCEVCheapExpansionBudget; +extern cl::opt<unsigned> SCEVCheapExpansionBudget; + +/// Return true if the given expression is safe to expand in the sense that +/// all materialized values are safe to speculate anywhere their operands are +/// defined. +bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE); + +/// Return true if the given expression is safe to expand in the sense that +/// all materialized values are defined and safe to speculate at the specified +/// location and their operands are defined at this location. +bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, + ScalarEvolution &SE); + +/// struct for holding enough information to help calculate the cost of the +/// given SCEV when expanded into IR. +struct SCEVOperand { + explicit SCEVOperand(unsigned Opc, int Idx, const SCEV *S) : + ParentOpcode(Opc), OperandIdx(Idx), S(S) { } + /// LLVM instruction opcode that uses the operand. + unsigned ParentOpcode; + /// The use index of an expanded instruction. + int OperandIdx; + /// The SCEV operand to be costed. + const SCEV* S; +}; + +/// This class uses information about analyze scalars to rewrite expressions +/// in canonical form. +/// +/// Clients should create an instance of this class when rewriting is needed, +/// and destroy it when finished to allow the release of the associated +/// memory. +class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> { + ScalarEvolution &SE; + const DataLayout &DL; + + // New instructions receive a name to identify them with the current pass. + const char *IVName; + + /// Indicates whether LCSSA phis should be created for inserted values. + bool PreserveLCSSA; + + // InsertedExpressions caches Values for reuse, so must track RAUW. + DenseMap<std::pair<const SCEV *, Instruction *>, TrackingVH<Value>> + InsertedExpressions; + + // InsertedValues only flags inserted instructions so needs no RAUW. + DenseSet<AssertingVH<Value>> InsertedValues; + DenseSet<AssertingVH<Value>> InsertedPostIncValues; + + /// Keep track of the existing IR values re-used during expansion. + /// FIXME: Ideally re-used instructions would not be added to + /// InsertedValues/InsertedPostIncValues. + SmallPtrSet<Value *, 16> ReusedValues; + + /// A memoization of the "relevant" loop for a given SCEV. + DenseMap<const SCEV *, const Loop *> RelevantLoops; + + /// Addrecs referring to any of the given loops are expanded in post-inc + /// mode. For example, expanding {1,+,1}<L> in post-inc mode returns the add + /// instruction that adds one to the phi for {0,+,1}<L>, as opposed to a new + /// phi starting at 1. This is only supported in non-canonical mode. + PostIncLoopSet PostIncLoops; + + /// When this is non-null, addrecs expanded in the loop it indicates should + /// be inserted with increments at IVIncInsertPos. + const Loop *IVIncInsertLoop; + + /// When expanding addrecs in the IVIncInsertLoop loop, insert the IV + /// increment at this position. + Instruction *IVIncInsertPos; + + /// Phis that complete an IV chain. Reuse + DenseSet<AssertingVH<PHINode>> ChainedPhis; + + /// When true, SCEVExpander tries to expand expressions in "canonical" form. + /// When false, expressions are expanded in a more literal form. + /// + /// In "canonical" form addrecs are expanded as arithmetic based on a + /// canonical induction variable. Note that CanonicalMode doesn't guarantee + /// that all expressions are expanded in "canonical" form. For some + /// expressions literal mode can be preferred. + bool CanonicalMode; + + /// When invoked from LSR, the expander is in "strength reduction" mode. The + /// only difference is that phi's are only reused if they are already in + /// "expanded" form. + bool LSRMode; + + typedef IRBuilder<TargetFolder, IRBuilderCallbackInserter> BuilderType; + BuilderType Builder; + + // RAII object that stores the current insertion point and restores it when + // the object is destroyed. This includes the debug location. Duplicated + // from InsertPointGuard to add SetInsertPoint() which is used to updated + // InsertPointGuards stack when insert points are moved during SCEV + // expansion. + class SCEVInsertPointGuard { + IRBuilderBase &Builder; + AssertingVH<BasicBlock> Block; + BasicBlock::iterator Point; + DebugLoc DbgLoc; + SCEVExpander *SE; + + SCEVInsertPointGuard(const SCEVInsertPointGuard &) = delete; + SCEVInsertPointGuard &operator=(const SCEVInsertPointGuard &) = delete; - /// Return true if the given expression is safe to expand in the sense that - /// all materialized values are safe to speculate anywhere their operands are - /// defined. - bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE); + public: + SCEVInsertPointGuard(IRBuilderBase &B, SCEVExpander *SE) + : Builder(B), Block(B.GetInsertBlock()), Point(B.GetInsertPoint()), + DbgLoc(B.getCurrentDebugLocation()), SE(SE) { + SE->InsertPointGuards.push_back(this); + } - /// Return true if the given expression is safe to expand in the sense that - /// all materialized values are defined and safe to speculate at the specified - /// location and their operands are defined at this location. - bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, - ScalarEvolution &SE); + ~SCEVInsertPointGuard() { + // These guards should always created/destroyed in FIFO order since they + // are used to guard lexically scoped blocks of code in + // ScalarEvolutionExpander. + assert(SE->InsertPointGuards.back() == this); + SE->InsertPointGuards.pop_back(); + Builder.restoreIP(IRBuilderBase::InsertPoint(Block, Point)); + Builder.SetCurrentDebugLocation(DbgLoc); + } - /// This class uses information about analyze scalars to rewrite expressions - /// in canonical form. - /// - /// Clients should create an instance of this class when rewriting is needed, - /// and destroy it when finished to allow the release of the associated - /// memory. - class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> { - ScalarEvolution &SE; - const DataLayout &DL; - - // New instructions receive a name to identify them with the current pass. - const char* IVName; - - // InsertedExpressions caches Values for reuse, so must track RAUW. - DenseMap<std::pair<const SCEV *, Instruction *>, TrackingVH<Value>> - InsertedExpressions; - - // InsertedValues only flags inserted instructions so needs no RAUW. - DenseSet<AssertingVH<Value>> InsertedValues; - DenseSet<AssertingVH<Value>> InsertedPostIncValues; - - /// A memoization of the "relevant" loop for a given SCEV. - DenseMap<const SCEV *, const Loop *> RelevantLoops; - - /// Addrecs referring to any of the given loops are expanded in post-inc - /// mode. For example, expanding {1,+,1}<L> in post-inc mode returns the add - /// instruction that adds one to the phi for {0,+,1}<L>, as opposed to a new - /// phi starting at 1. This is only supported in non-canonical mode. - PostIncLoopSet PostIncLoops; - - /// When this is non-null, addrecs expanded in the loop it indicates should - /// be inserted with increments at IVIncInsertPos. - const Loop *IVIncInsertLoop; - - /// When expanding addrecs in the IVIncInsertLoop loop, insert the IV - /// increment at this position. - Instruction *IVIncInsertPos; - - /// Phis that complete an IV chain. Reuse - DenseSet<AssertingVH<PHINode>> ChainedPhis; - - /// When true, SCEVExpander tries to expand expressions in "canonical" form. - /// When false, expressions are expanded in a more literal form. - /// - /// In "canonical" form addrecs are expanded as arithmetic based on a - /// canonical induction variable. Note that CanonicalMode doesn't guarantee - /// that all expressions are expanded in "canonical" form. For some - /// expressions literal mode can be preferred. - bool CanonicalMode; - - /// When invoked from LSR, the expander is in "strength reduction" mode. The - /// only difference is that phi's are only reused if they are already in - /// "expanded" form. - bool LSRMode; - - typedef IRBuilder<TargetFolder> BuilderType; - BuilderType Builder; - - // RAII object that stores the current insertion point and restores it when - // the object is destroyed. This includes the debug location. Duplicated - // from InsertPointGuard to add SetInsertPoint() which is used to updated - // InsertPointGuards stack when insert points are moved during SCEV - // expansion. - class SCEVInsertPointGuard { - IRBuilderBase &Builder; - AssertingVH<BasicBlock> Block; - BasicBlock::iterator Point; - DebugLoc DbgLoc; - SCEVExpander *SE; - - SCEVInsertPointGuard(const SCEVInsertPointGuard &) = delete; - SCEVInsertPointGuard &operator=(const SCEVInsertPointGuard &) = delete; - - public: - SCEVInsertPointGuard(IRBuilderBase &B, SCEVExpander *SE) - : Builder(B), Block(B.GetInsertBlock()), Point(B.GetInsertPoint()), - DbgLoc(B.getCurrentDebugLocation()), SE(SE) { - SE->InsertPointGuards.push_back(this); - } - - ~SCEVInsertPointGuard() { - // These guards should always created/destroyed in FIFO order since they - // are used to guard lexically scoped blocks of code in - // ScalarEvolutionExpander. - assert(SE->InsertPointGuards.back() == this); - SE->InsertPointGuards.pop_back(); - Builder.restoreIP(IRBuilderBase::InsertPoint(Block, Point)); - Builder.SetCurrentDebugLocation(DbgLoc); - } - - BasicBlock::iterator GetInsertPoint() const { return Point; } - void SetInsertPoint(BasicBlock::iterator I) { Point = I; } - }; - - /// Stack of pointers to saved insert points, used to keep insert points - /// consistent when instructions are moved. - SmallVector<SCEVInsertPointGuard *, 8> InsertPointGuards; + BasicBlock::iterator GetInsertPoint() const { return Point; } + void SetInsertPoint(BasicBlock::iterator I) { Point = I; } + }; + + /// Stack of pointers to saved insert points, used to keep insert points + /// consistent when instructions are moved. + SmallVector<SCEVInsertPointGuard *, 8> InsertPointGuards; #ifndef NDEBUG - const char *DebugType; + const char *DebugType; #endif - friend struct SCEVVisitor<SCEVExpander, Value*>; - - public: - /// Construct a SCEVExpander in "canonical" mode. - explicit SCEVExpander(ScalarEvolution &se, const DataLayout &DL, - const char *name) - : SE(se), DL(DL), IVName(name), IVIncInsertLoop(nullptr), - IVIncInsertPos(nullptr), CanonicalMode(true), LSRMode(false), - Builder(se.getContext(), TargetFolder(DL)) { + friend struct SCEVVisitor<SCEVExpander, Value *>; + +public: + /// Construct a SCEVExpander in "canonical" mode. + explicit SCEVExpander(ScalarEvolution &se, const DataLayout &DL, + const char *name, bool PreserveLCSSA = true) + : SE(se), DL(DL), IVName(name), PreserveLCSSA(PreserveLCSSA), + IVIncInsertLoop(nullptr), IVIncInsertPos(nullptr), CanonicalMode(true), + LSRMode(false), + Builder(se.getContext(), TargetFolder(DL), + IRBuilderCallbackInserter( + [this](Instruction *I) { rememberInstruction(I); })) { #ifndef NDEBUG - DebugType = ""; + DebugType = ""; #endif - } + } - ~SCEVExpander() { - // Make sure the insert point guard stack is consistent. - assert(InsertPointGuards.empty()); - } + ~SCEVExpander() { + // Make sure the insert point guard stack is consistent. + assert(InsertPointGuards.empty()); + } #ifndef NDEBUG - void setDebugType(const char* s) { DebugType = s; } + void setDebugType(const char *s) { DebugType = s; } #endif - /// Erase the contents of the InsertedExpressions map so that users trying - /// to expand the same expression into multiple BasicBlocks or different - /// places within the same BasicBlock can do so. - void clear() { - InsertedExpressions.clear(); - InsertedValues.clear(); - InsertedPostIncValues.clear(); - ChainedPhis.clear(); + /// Erase the contents of the InsertedExpressions map so that users trying + /// to expand the same expression into multiple BasicBlocks or different + /// places within the same BasicBlock can do so. + void clear() { + InsertedExpressions.clear(); + InsertedValues.clear(); + InsertedPostIncValues.clear(); + ReusedValues.clear(); + ChainedPhis.clear(); + } + + /// Return a vector containing all instructions inserted during expansion. + SmallVector<Instruction *, 32> getAllInsertedInstructions() const { + SmallVector<Instruction *, 32> Result; + for (auto &VH : InsertedValues) { + Value *V = VH; + if (ReusedValues.contains(V)) + continue; + if (auto *Inst = dyn_cast<Instruction>(V)) + Result.push_back(Inst); } - - /// Return true for expressions that can't be evaluated at runtime - /// within given \b Budget. - /// - /// At is a parameter which specifies point in code where user is going to - /// expand this expression. Sometimes this knowledge can lead to - /// a less pessimistic cost estimation. - bool isHighCostExpansion(const SCEV *Expr, Loop *L, unsigned Budget, - const TargetTransformInfo *TTI, - const Instruction *At) { - assert(TTI && "This function requires TTI to be provided."); - assert(At && "This function requires At instruction to be provided."); - if (!TTI) // In assert-less builds, avoid crashing - return true; // by always claiming to be high-cost. - SmallVector<const SCEV *, 8> Worklist; - SmallPtrSet<const SCEV *, 8> Processed; - int BudgetRemaining = Budget * TargetTransformInfo::TCC_Basic; - Worklist.emplace_back(Expr); - while (!Worklist.empty()) { - const SCEV *S = Worklist.pop_back_val(); - if (isHighCostExpansionHelper(S, L, *At, BudgetRemaining, *TTI, - Processed, Worklist)) - return true; - } - assert(BudgetRemaining >= 0 && "Should have returned from inner loop."); - return false; + for (auto &VH : InsertedPostIncValues) { + Value *V = VH; + if (ReusedValues.contains(V)) + continue; + if (auto *Inst = dyn_cast<Instruction>(V)) + Result.push_back(Inst); } - /// This method returns the canonical induction variable of the specified - /// type for the specified loop (inserting one if there is none). A - /// canonical induction variable starts at zero and steps by one on each - /// iteration. - PHINode *getOrInsertCanonicalInductionVariable(const Loop *L, Type *Ty); - - /// Return the induction variable increment's IV operand. - Instruction *getIVIncOperand(Instruction *IncV, Instruction *InsertPos, - bool allowScale); - - /// Utility for hoisting an IV increment. - bool hoistIVInc(Instruction *IncV, Instruction *InsertPos); - - /// replace congruent phis with their most canonical representative. Return - /// the number of phis eliminated. - unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, - SmallVectorImpl<WeakTrackingVH> &DeadInsts, - const TargetTransformInfo *TTI = nullptr); - - /// Insert code to directly compute the specified SCEV expression into the - /// program. The inserted code is inserted into the specified block. - Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I); - - /// Insert code to directly compute the specified SCEV expression into the - /// program. The inserted code is inserted into the SCEVExpander's current - /// insertion point. If a type is specified, the result will be expanded to - /// have that type, with a cast if necessary. - Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr); - - - /// Generates a code sequence that evaluates this predicate. The inserted - /// instructions will be at position \p Loc. The result will be of type i1 - /// and will have a value of 0 when the predicate is false and 1 otherwise. - Value *expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc); - - /// A specialized variant of expandCodeForPredicate, handling the case when - /// we are expanding code for a SCEVEqualPredicate. - Value *expandEqualPredicate(const SCEVEqualPredicate *Pred, - Instruction *Loc); - - /// Generates code that evaluates if the \p AR expression will overflow. - Value *generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc, - bool Signed); - - /// A specialized variant of expandCodeForPredicate, handling the case when - /// we are expanding code for a SCEVWrapPredicate. - Value *expandWrapPredicate(const SCEVWrapPredicate *P, Instruction *Loc); - - /// A specialized variant of expandCodeForPredicate, handling the case when - /// we are expanding code for a SCEVUnionPredicate. - Value *expandUnionPredicate(const SCEVUnionPredicate *Pred, - Instruction *Loc); - - /// Set the current IV increment loop and position. - void setIVIncInsertPos(const Loop *L, Instruction *Pos) { - assert(!CanonicalMode && - "IV increment positions are not supported in CanonicalMode"); - IVIncInsertLoop = L; - IVIncInsertPos = Pos; - } + return Result; + } - /// Enable post-inc expansion for addrecs referring to the given - /// loops. Post-inc expansion is only supported in non-canonical mode. - void setPostInc(const PostIncLoopSet &L) { - assert(!CanonicalMode && - "Post-inc expansion is not supported in CanonicalMode"); - PostIncLoops = L; + /// Return true for expressions that can't be evaluated at runtime + /// within given \b Budget. + /// + /// At is a parameter which specifies point in code where user is going to + /// expand this expression. Sometimes this knowledge can lead to + /// a less pessimistic cost estimation. + bool isHighCostExpansion(const SCEV *Expr, Loop *L, unsigned Budget, + const TargetTransformInfo *TTI, + const Instruction *At) { + assert(TTI && "This function requires TTI to be provided."); + assert(At && "This function requires At instruction to be provided."); + if (!TTI) // In assert-less builds, avoid crashing + return true; // by always claiming to be high-cost. + SmallVector<SCEVOperand, 8> Worklist; + SmallPtrSet<const SCEV *, 8> Processed; + int BudgetRemaining = Budget * TargetTransformInfo::TCC_Basic; + Worklist.emplace_back(-1, -1, Expr); + while (!Worklist.empty()) { + const SCEVOperand WorkItem = Worklist.pop_back_val(); + if (isHighCostExpansionHelper(WorkItem, L, *At, BudgetRemaining, + *TTI, Processed, Worklist)) + return true; } + assert(BudgetRemaining >= 0 && "Should have returned from inner loop."); + return false; + } + + /// Return the induction variable increment's IV operand. + Instruction *getIVIncOperand(Instruction *IncV, Instruction *InsertPos, + bool allowScale); + + /// Utility for hoisting an IV increment. + bool hoistIVInc(Instruction *IncV, Instruction *InsertPos); + + /// replace congruent phis with their most canonical representative. Return + /// the number of phis eliminated. + unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT, + SmallVectorImpl<WeakTrackingVH> &DeadInsts, + const TargetTransformInfo *TTI = nullptr); + + /// Insert code to directly compute the specified SCEV expression into the + /// program. The code is inserted into the specified block. + Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I) { + return expandCodeForImpl(SH, Ty, I, true); + } + + /// Insert code to directly compute the specified SCEV expression into the + /// program. The code is inserted into the SCEVExpander's current + /// insertion point. If a type is specified, the result will be expanded to + /// have that type, with a cast if necessary. + Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr) { + return expandCodeForImpl(SH, Ty, true); + } + + /// Generates a code sequence that evaluates this predicate. The inserted + /// instructions will be at position \p Loc. The result will be of type i1 + /// and will have a value of 0 when the predicate is false and 1 otherwise. + Value *expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc); + + /// A specialized variant of expandCodeForPredicate, handling the case when + /// we are expanding code for a SCEVEqualPredicate. + Value *expandEqualPredicate(const SCEVEqualPredicate *Pred, Instruction *Loc); + + /// Generates code that evaluates if the \p AR expression will overflow. + Value *generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc, + bool Signed); + + /// A specialized variant of expandCodeForPredicate, handling the case when + /// we are expanding code for a SCEVWrapPredicate. + Value *expandWrapPredicate(const SCEVWrapPredicate *P, Instruction *Loc); + + /// A specialized variant of expandCodeForPredicate, handling the case when + /// we are expanding code for a SCEVUnionPredicate. + Value *expandUnionPredicate(const SCEVUnionPredicate *Pred, Instruction *Loc); + + /// Set the current IV increment loop and position. + void setIVIncInsertPos(const Loop *L, Instruction *Pos) { + assert(!CanonicalMode && + "IV increment positions are not supported in CanonicalMode"); + IVIncInsertLoop = L; + IVIncInsertPos = Pos; + } + + /// Enable post-inc expansion for addrecs referring to the given + /// loops. Post-inc expansion is only supported in non-canonical mode. + void setPostInc(const PostIncLoopSet &L) { + assert(!CanonicalMode && + "Post-inc expansion is not supported in CanonicalMode"); + PostIncLoops = L; + } + + /// Disable all post-inc expansion. + void clearPostInc() { + PostIncLoops.clear(); + + // When we change the post-inc loop set, cached expansions may no + // longer be valid. + InsertedPostIncValues.clear(); + } + + /// Disable the behavior of expanding expressions in canonical form rather + /// than in a more literal form. Non-canonical mode is useful for late + /// optimization passes. + void disableCanonicalMode() { CanonicalMode = false; } + + void enableLSRMode() { LSRMode = true; } + + /// Set the current insertion point. This is useful if multiple calls to + /// expandCodeFor() are going to be made with the same insert point and the + /// insert point may be moved during one of the expansions (e.g. if the + /// insert point is not a block terminator). + void setInsertPoint(Instruction *IP) { + assert(IP); + Builder.SetInsertPoint(IP); + } + + /// Clear the current insertion point. This is useful if the instruction + /// that had been serving as the insertion point may have been deleted. + void clearInsertPoint() { Builder.ClearInsertionPoint(); } + + /// Set location information used by debugging information. + void SetCurrentDebugLocation(DebugLoc L) { + Builder.SetCurrentDebugLocation(std::move(L)); + } + + /// Get location information used by debugging information. + DebugLoc getCurrentDebugLocation() const { + return Builder.getCurrentDebugLocation(); + } + + /// Return true if the specified instruction was inserted by the code + /// rewriter. If so, the client should not modify the instruction. Note that + /// this also includes instructions re-used during expansion. + bool isInsertedInstruction(Instruction *I) const { + return InsertedValues.count(I) || InsertedPostIncValues.count(I); + } + + void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); } + + /// Try to find the ValueOffsetPair for S. The function is mainly used to + /// check whether S can be expanded cheaply. If this returns a non-None + /// value, we know we can codegen the `ValueOffsetPair` into a suitable + /// expansion identical with S so that S can be expanded cheaply. + /// + /// L is a hint which tells in which loop to look for the suitable value. + /// On success return value which is equivalent to the expanded S at point + /// At. Return nullptr if value was not found. + /// + /// Note that this function does not perform an exhaustive search. I.e if it + /// didn't find any value it does not mean that there is no such value. + /// + Optional<ScalarEvolution::ValueOffsetPair> + getRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L); - /// Disable all post-inc expansion. - void clearPostInc() { - PostIncLoops.clear(); + /// Returns a suitable insert point after \p I, that dominates \p + /// MustDominate. Skips instructions inserted by the expander. + BasicBlock::iterator findInsertPointAfter(Instruction *I, + Instruction *MustDominate); - // When we change the post-inc loop set, cached expansions may no - // longer be valid. - InsertedPostIncValues.clear(); - } +private: + LLVMContext &getContext() const { return SE.getContext(); } - /// Disable the behavior of expanding expressions in canonical form rather - /// than in a more literal form. Non-canonical mode is useful for late - /// optimization passes. - void disableCanonicalMode() { CanonicalMode = false; } + /// Insert code to directly compute the specified SCEV expression into the + /// program. The code is inserted into the SCEVExpander's current + /// insertion point. If a type is specified, the result will be expanded to + /// have that type, with a cast if necessary. If \p Root is true, this + /// indicates that \p SH is the top-level expression to expand passed from + /// an external client call. + Value *expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root); - void enableLSRMode() { LSRMode = true; } + /// Insert code to directly compute the specified SCEV expression into the + /// program. The code is inserted into the specified block. If \p + /// Root is true, this indicates that \p SH is the top-level expression to + /// expand passed from an external client call. + Value *expandCodeForImpl(const SCEV *SH, Type *Ty, Instruction *I, bool Root); - /// Set the current insertion point. This is useful if multiple calls to - /// expandCodeFor() are going to be made with the same insert point and the - /// insert point may be moved during one of the expansions (e.g. if the - /// insert point is not a block terminator). - void setInsertPoint(Instruction *IP) { - assert(IP); - Builder.SetInsertPoint(IP); - } + /// Recursive helper function for isHighCostExpansion. + bool isHighCostExpansionHelper( + const SCEVOperand &WorkItem, Loop *L, const Instruction &At, + int &BudgetRemaining, const TargetTransformInfo &TTI, + SmallPtrSetImpl<const SCEV *> &Processed, + SmallVectorImpl<SCEVOperand> &Worklist); - /// Clear the current insertion point. This is useful if the instruction - /// that had been serving as the insertion point may have been deleted. - void clearInsertPoint() { Builder.ClearInsertionPoint(); } + /// Insert the specified binary operator, doing a small amount of work to + /// avoid inserting an obviously redundant operation, and hoisting to an + /// outer loop when the opportunity is there and it is safe. + Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, + SCEV::NoWrapFlags Flags, bool IsSafeToHoist); - /// Set location information used by debugging information. - void SetCurrentDebugLocation(DebugLoc L) { - Builder.SetCurrentDebugLocation(std::move(L)); - } + /// Arrange for there to be a cast of V to Ty at IP, reusing an existing + /// cast if a suitable one exists, moving an existing cast if a suitable one + /// exists but isn't in the right place, or creating a new one. + Value *ReuseOrCreateCast(Value *V, Type *Ty, Instruction::CastOps Op, + BasicBlock::iterator IP); - /// Get location information used by debugging information. - const DebugLoc &getCurrentDebugLocation() const { - return Builder.getCurrentDebugLocation(); - } + /// Insert a cast of V to the specified type, which must be possible with a + /// noop cast, doing what we can to share the casts. + Value *InsertNoopCastOfTo(Value *V, Type *Ty); - /// Return true if the specified instruction was inserted by the code - /// rewriter. If so, the client should not modify the instruction. - bool isInsertedInstruction(Instruction *I) const { - return InsertedValues.count(I) || InsertedPostIncValues.count(I); - } + /// Expand a SCEVAddExpr with a pointer type into a GEP instead of using + /// ptrtoint+arithmetic+inttoptr. + Value *expandAddToGEP(const SCEV *const *op_begin, const SCEV *const *op_end, + PointerType *PTy, Type *Ty, Value *V); + Value *expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, Value *V); - void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); } - - /// Try to find existing LLVM IR value for S available at the point At. - Value *getExactExistingExpansion(const SCEV *S, const Instruction *At, - Loop *L); - - /// Try to find the ValueOffsetPair for S. The function is mainly used to - /// check whether S can be expanded cheaply. If this returns a non-None - /// value, we know we can codegen the `ValueOffsetPair` into a suitable - /// expansion identical with S so that S can be expanded cheaply. - /// - /// L is a hint which tells in which loop to look for the suitable value. - /// On success return value which is equivalent to the expanded S at point - /// At. Return nullptr if value was not found. - /// - /// Note that this function does not perform an exhaustive search. I.e if it - /// didn't find any value it does not mean that there is no such value. - /// - Optional<ScalarEvolution::ValueOffsetPair> - getRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L); - - private: - LLVMContext &getContext() const { return SE.getContext(); } - - /// Recursive helper function for isHighCostExpansion. - bool isHighCostExpansionHelper(const SCEV *S, Loop *L, - const Instruction &At, int &BudgetRemaining, - const TargetTransformInfo &TTI, - SmallPtrSetImpl<const SCEV *> &Processed, - SmallVectorImpl<const SCEV *> &Worklist); - - /// Insert the specified binary operator, doing a small amount of work to - /// avoid inserting an obviously redundant operation, and hoisting to an - /// outer loop when the opportunity is there and it is safe. - Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, - SCEV::NoWrapFlags Flags, bool IsSafeToHoist); - - /// Arrange for there to be a cast of V to Ty at IP, reusing an existing - /// cast if a suitable one exists, moving an existing cast if a suitable one - /// exists but isn't in the right place, or creating a new one. - Value *ReuseOrCreateCast(Value *V, Type *Ty, - Instruction::CastOps Op, - BasicBlock::iterator IP); - - /// Insert a cast of V to the specified type, which must be possible with a - /// noop cast, doing what we can to share the casts. - Value *InsertNoopCastOfTo(Value *V, Type *Ty); - - /// Expand a SCEVAddExpr with a pointer type into a GEP instead of using - /// ptrtoint+arithmetic+inttoptr. - Value *expandAddToGEP(const SCEV *const *op_begin, - const SCEV *const *op_end, - PointerType *PTy, Type *Ty, Value *V); - Value *expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, Value *V); - - /// Find a previous Value in ExprValueMap for expand. - ScalarEvolution::ValueOffsetPair - FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); - - Value *expand(const SCEV *S); - - /// Determine the most "relevant" loop for the given SCEV. - const Loop *getRelevantLoop(const SCEV *); - - Value *visitConstant(const SCEVConstant *S) { - return S->getValue(); - } + /// Find a previous Value in ExprValueMap for expand. + ScalarEvolution::ValueOffsetPair + FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt); - Value *visitTruncateExpr(const SCEVTruncateExpr *S); + Value *expand(const SCEV *S); - Value *visitZeroExtendExpr(const SCEVZeroExtendExpr *S); + /// Determine the most "relevant" loop for the given SCEV. + const Loop *getRelevantLoop(const SCEV *); - Value *visitSignExtendExpr(const SCEVSignExtendExpr *S); + Value *visitConstant(const SCEVConstant *S) { return S->getValue(); } - Value *visitAddExpr(const SCEVAddExpr *S); + Value *visitPtrToIntExpr(const SCEVPtrToIntExpr *S); - Value *visitMulExpr(const SCEVMulExpr *S); + Value *visitTruncateExpr(const SCEVTruncateExpr *S); - Value *visitUDivExpr(const SCEVUDivExpr *S); + Value *visitZeroExtendExpr(const SCEVZeroExtendExpr *S); - Value *visitAddRecExpr(const SCEVAddRecExpr *S); + Value *visitSignExtendExpr(const SCEVSignExtendExpr *S); - Value *visitSMaxExpr(const SCEVSMaxExpr *S); + Value *visitAddExpr(const SCEVAddExpr *S); - Value *visitUMaxExpr(const SCEVUMaxExpr *S); + Value *visitMulExpr(const SCEVMulExpr *S); - Value *visitSMinExpr(const SCEVSMinExpr *S); + Value *visitUDivExpr(const SCEVUDivExpr *S); - Value *visitUMinExpr(const SCEVUMinExpr *S); + Value *visitAddRecExpr(const SCEVAddRecExpr *S); - Value *visitUnknown(const SCEVUnknown *S) { - return S->getValue(); - } + Value *visitSMaxExpr(const SCEVSMaxExpr *S); - void rememberInstruction(Value *I); + Value *visitUMaxExpr(const SCEVUMaxExpr *S); - bool isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L); + Value *visitSMinExpr(const SCEVSMinExpr *S); - bool isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L); + Value *visitUMinExpr(const SCEVUMinExpr *S); - Value *expandAddRecExprLiterally(const SCEVAddRecExpr *); - PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, - const Loop *L, - Type *ExpandTy, - Type *IntTy, - Type *&TruncTy, - bool &InvertStep); - Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L, - Type *ExpandTy, Type *IntTy, bool useSubtract); + Value *visitUnknown(const SCEVUnknown *S) { return S->getValue(); } - void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, - Instruction *Pos, PHINode *LoopPhi); + void rememberInstruction(Value *I); - void fixupInsertPoints(Instruction *I); - }; -} + bool isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L); + + bool isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L); + + Value *expandAddRecExprLiterally(const SCEVAddRecExpr *); + PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, + const Loop *L, Type *ExpandTy, Type *IntTy, + Type *&TruncTy, bool &InvertStep); + Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L, Type *ExpandTy, + Type *IntTy, bool useSubtract); + + void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, + Instruction *Pos, PHINode *LoopPhi); + + void fixupInsertPoints(Instruction *I); + + /// If required, create LCSSA PHIs for \p Users' operand \p OpIdx. If new + /// LCSSA PHIs have been created, return the LCSSA PHI available at \p User. + /// If no PHIs have been created, return the unchanged operand \p OpIdx. + Value *fixupLCSSAFormFor(Instruction *User, unsigned OpIdx); +}; + +/// Helper to remove instructions inserted during SCEV expansion, unless they +/// are marked as used. +class SCEVExpanderCleaner { + SCEVExpander &Expander; + + DominatorTree &DT; + + /// Indicates whether the result of the expansion is used. If false, the + /// instructions added during expansion are removed. + bool ResultUsed; + +public: + SCEVExpanderCleaner(SCEVExpander &Expander, DominatorTree &DT) + : Expander(Expander), DT(DT), ResultUsed(false) {} + + ~SCEVExpanderCleaner(); + + /// Indicate that the result of the expansion is used. + void markResultUsed() { ResultUsed = true; } +}; +} // namespace llvm #endif diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h new file mode 100644 index 000000000000..fb3a7490346f --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h @@ -0,0 +1,77 @@ +//===- SimplifyCFGOptions.h - Control structure for SimplifyCFG -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A set of parameters used to control the transforms in the SimplifyCFG pass. +// Options may change depending on the position in the optimization pipeline. +// For example, canonical form that includes switches and branches may later be +// replaced by lookup tables and selects. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H +#define LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H + +namespace llvm { + +class AssumptionCache; + +struct SimplifyCFGOptions { + int BonusInstThreshold = 1; + bool ForwardSwitchCondToPhi = false; + bool ConvertSwitchToLookupTable = false; + bool NeedCanonicalLoop = true; + bool HoistCommonInsts = false; + bool SinkCommonInsts = false; + bool SimplifyCondBranch = true; + bool FoldTwoEntryPHINode = true; + + AssumptionCache *AC = nullptr; + + // Support 'builder' pattern to set members by name at construction time. + SimplifyCFGOptions &bonusInstThreshold(int I) { + BonusInstThreshold = I; + return *this; + } + SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) { + ForwardSwitchCondToPhi = B; + return *this; + } + SimplifyCFGOptions &convertSwitchToLookupTable(bool B) { + ConvertSwitchToLookupTable = B; + return *this; + } + SimplifyCFGOptions &needCanonicalLoops(bool B) { + NeedCanonicalLoop = B; + return *this; + } + SimplifyCFGOptions &hoistCommonInsts(bool B) { + HoistCommonInsts = B; + return *this; + } + SimplifyCFGOptions &sinkCommonInsts(bool B) { + SinkCommonInsts = B; + return *this; + } + SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) { + AC = Cache; + return *this; + } + SimplifyCFGOptions &setSimplifyCondBranch(bool B) { + SimplifyCondBranch = B; + return *this; + } + + SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) { + FoldTwoEntryPHINode = B; + return *this; + } +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h index 53b15e4aa66c..4ba56fb45afa 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyIndVar.h @@ -15,6 +15,8 @@ #ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H #define LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/ValueHandle.h" namespace llvm { @@ -57,6 +59,27 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, const TargetTransformInfo *TTI, SmallVectorImpl<WeakTrackingVH> &Dead); +/// Collect information about induction variables that are used by sign/zero +/// extend operations. This information is recorded by CollectExtend and provides +/// the input to WidenIV. +struct WideIVInfo { + PHINode *NarrowIV = nullptr; + + // Widest integer type created [sz]ext + Type *WidestNativeType = nullptr; + + // Was a sext user seen before a zext? + bool IsSigned = false; +}; + +/// Widen Induction Variables - Extend the width of an IV to cover its +/// widest uses. +PHINode *createWideIV(const WideIVInfo &WI, + LoopInfo *LI, ScalarEvolution *SE, SCEVExpander &Rewriter, + DominatorTree *DT, SmallVectorImpl<WeakTrackingVH> &DeadInsts, + unsigned &NumElimExt, unsigned &NumWidened, + bool HasGuards, bool UsePostIncrementRanges); + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h index d6ee19365c72..8703434e1696 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -24,7 +24,6 @@ class CallInst; class DataLayout; class Instruction; class IRBuilderBase; -class TargetLibraryInfo; class Function; class OptimizationRemarkEmitter; class BlockFrequencyInfo; @@ -60,6 +59,7 @@ private: Value *optimizeStrpCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func); Value *optimizeStrpNCpyChk(CallInst *CI, IRBuilderBase &B, LibFunc Func); Value *optimizeStrLenChk(CallInst *CI, IRBuilderBase &B); + Value *optimizeMemPCpyChk(CallInst *CI, IRBuilderBase &B); Value *optimizeMemCCpyChk(CallInst *CI, IRBuilderBase &B); Value *optimizeSNPrintfChk(CallInst *CI, IRBuilderBase &B); Value *optimizeSPrintfChk(CallInst *CI,IRBuilderBase &B); diff --git a/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/llvm/include/llvm/Transforms/Utils/SizeOpts.h index 08d963475f23..3c1173b747d3 100644 --- a/llvm/include/llvm/Transforms/Utils/SizeOpts.h +++ b/llvm/include/llvm/Transforms/Utils/SizeOpts.h @@ -19,7 +19,6 @@ extern llvm::cl::opt<bool> EnablePGSO; extern llvm::cl::opt<bool> PGSOLargeWorkingSetSizeOnly; -extern llvm::cl::opt<bool> PGSOIRPassOrTestOnly; extern llvm::cl::opt<bool> PGSOColdCodeOnly; extern llvm::cl::opt<bool> PGSOColdCodeOnlyForInstrPGO; extern llvm::cl::opt<bool> PGSOColdCodeOnlyForSamplePGO; @@ -60,11 +59,6 @@ bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI, return true; if (!EnablePGSO) return false; - // Temporarily enable size optimizations only for the IR pass or test query - // sites for gradual commit/rollout. This is to be removed later. - if (PGSOIRPassOrTestOnly && !(QueryType == PGSOQueryType::IRPass || - QueryType == PGSOQueryType::Test)) - return false; if (isPGSOColdCodeOnly(PSI)) return AdapterT::isFunctionColdInCallGraph(F, PSI, *BFI); if (PSI->hasSampleProfile()) @@ -85,11 +79,6 @@ bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq, ProfileSummaryIn return true; if (!EnablePGSO) return false; - // Temporarily enable size optimizations only for the IR pass or test query - // sites for gradual commit/rollout. This is to be removed later. - if (PGSOIRPassOrTestOnly && !(QueryType == PGSOQueryType::IRPass || - QueryType == PGSOQueryType::Test)) - return false; if (isPGSOColdCodeOnly(PSI)) return AdapterT::isColdBlock(BBOrBlockFreq, PSI, BFI); if (PSI->hasSampleProfile()) diff --git a/llvm/include/llvm/Transforms/Utils/StripGCRelocates.h b/llvm/include/llvm/Transforms/Utils/StripGCRelocates.h new file mode 100644 index 000000000000..13e6d8ac26a7 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/StripGCRelocates.h @@ -0,0 +1,25 @@ +//===- StripGCRelocates.h - -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_STRIPGCRELOCATES_H +#define LLVM_TRANSFORMS_UTILS_STRIPGCRELOCATES_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Function; + +class StripGCRelocates : public PassInfoMixin<StripGCRelocates> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_STRIPGCRELOCATES_H diff --git a/llvm/include/llvm/Transforms/Utils/StripNonLineTableDebugInfo.h b/llvm/include/llvm/Transforms/Utils/StripNonLineTableDebugInfo.h new file mode 100644 index 000000000000..20d0aabd2938 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/StripNonLineTableDebugInfo.h @@ -0,0 +1,26 @@ +//===- StripNonLineTableDebugInfo.h - -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_STRIPNONLINETABLEDEBUGINFO_H +#define LLVM_TRANSFORMS_UTILS_STRIPNONLINETABLEDEBUGINFO_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; + +class StripNonLineTableDebugInfoPass + : public PassInfoMixin<StripNonLineTableDebugInfoPass> { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_STRIPNONLINETABLEDEBUGINFO_H diff --git a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h index ff70446e163d..20b360212506 100644 --- a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h +++ b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h @@ -7,46 +7,39 @@ //===----------------------------------------------------------------------===// // // This pass is used to ensure that functions have at most one return and one -// unwind instruction in them. Additionally, it keeps track of which node is -// the new exit node of the CFG. If there are no return or unwind instructions -// in the function, the getReturnBlock/getUnwindBlock methods will return a null -// pointer. +// unreachable instruction in them. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_UTILS_UNIFYFUNCTIONEXITNODES_H #define LLVM_TRANSFORMS_UTILS_UNIFYFUNCTIONEXITNODES_H +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" namespace llvm { class BasicBlock; -struct UnifyFunctionExitNodes : public FunctionPass { - BasicBlock *ReturnBlock = nullptr; - BasicBlock *UnwindBlock = nullptr; - BasicBlock *UnreachableBlock; - +class UnifyFunctionExitNodesLegacyPass : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid - UnifyFunctionExitNodes(); + UnifyFunctionExitNodesLegacyPass(); // We can preserve non-critical-edgeness when we unify function exit nodes void getAnalysisUsage(AnalysisUsage &AU) const override; - // getReturn|Unwind|UnreachableBlock - Return the new single (or nonexistent) - // return, unwind, or unreachable basic blocks in the CFG. - // - BasicBlock *getReturnBlock() const { return ReturnBlock; } - BasicBlock *getUnwindBlock() const { return UnwindBlock; } - BasicBlock *getUnreachableBlock() const { return UnreachableBlock; } - bool runOnFunction(Function &F) override; }; Pass *createUnifyFunctionExitNodesPass(); +class UnifyFunctionExitNodesPass + : public PassInfoMixin<UnifyFunctionExitNodesPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_UNIFYFUNCTIONEXITNODES_H diff --git a/llvm/include/llvm/Transforms/Utils/UnifyLoopExits.h b/llvm/include/llvm/Transforms/Utils/UnifyLoopExits.h new file mode 100644 index 000000000000..0b219cd12222 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/UnifyLoopExits.h @@ -0,0 +1,22 @@ +//===- UnifyLoopExits.h - Redirect exiting edges to one block -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_UNIFYLOOPEXITS_H +#define LLVM_TRANSFORMS_UTILS_UNIFYLOOPEXITS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class UnifyLoopExitsPass : public PassInfoMixin<UnifyLoopExitsPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_UNIFYLOOPEXITS_H diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index bb3d02b95956..4254bd71a41c 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -92,16 +92,6 @@ bool UnrollRuntimeLoopRemainder( const TargetTransformInfo *TTI, bool PreserveLCSSA, Loop **ResultLoop = nullptr); -void computePeelCount(Loop *L, unsigned LoopSize, - TargetTransformInfo::UnrollingPreferences &UP, - TargetTransformInfo::PeelingPreferences &PP, - unsigned &TripCount, ScalarEvolution &SE); - -bool canPeel(Loop *L); - -bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); - LoopUnrollResult UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, unsigned TripMultiple, bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, @@ -121,7 +111,6 @@ bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, - bool &UseUpperBound); void simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, @@ -138,12 +127,6 @@ TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( Optional<bool> UserAllowPartial, Optional<bool> UserRuntime, Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount); -TargetTransformInfo::PeelingPreferences -gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, - const TargetTransformInfo &TTI, - Optional<bool> UserAllowPeeling, - Optional<bool> UserAllowProfileBasedPeeling); - unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, const TargetTransformInfo &TTI, diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index c6c3450f7760..2f80b4373b46 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -29,6 +29,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Support/TypeSize.h" #include "llvm/Transforms/Utils/LoopUtils.h" namespace llvm { @@ -43,8 +44,14 @@ namespace llvm { /// for example 'force', means a decision has been made. So, we need to be /// careful NOT to add them if the user hasn't specifically asked so. class LoopVectorizeHints { - enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED, - HK_PREDICATE }; + enum HintKind { + HK_WIDTH, + HK_UNROLL, + HK_FORCE, + HK_ISVECTORIZED, + HK_PREDICATE, + HK_SCALABLE + }; /// Hint - associates name and validation with the hint value. struct Hint { @@ -73,6 +80,9 @@ class LoopVectorizeHints { /// Vector Predicate Hint Predicate; + /// Says whether we should use fixed width or scalable vectorization. + Hint Scalable; + /// Return the loop metadata prefix. static StringRef Prefix() { return "llvm.loop."; } @@ -98,7 +108,9 @@ public: /// Dumps all the hint information. void emitRemarkWithHints() const; - unsigned getWidth() const { return Width.Value; } + ElementCount getWidth() const { + return ElementCount::get(Width.Value, isScalable()); + } unsigned getInterleave() const { return Interleave.Value; } unsigned getIsVectorized() const { return IsVectorized.Value; } unsigned getPredicate() const { return Predicate.Value; } @@ -109,6 +121,8 @@ public: return (ForceKind)Force.Value; } + bool isScalable() const { return Scalable.Value; } + /// If hints are provided that force vectorization, use the AlwaysPrint /// pass name to force the frontend to print the diagnostic. const char *vectorizeAnalysisPassName() const; @@ -119,7 +133,9 @@ public: // enabled by default because can be unsafe or inefficient. For example, // reordering floating-point operations will change the way round-off // error accumulates in the loop. - return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1; + ElementCount EC = getWidth(); + return getForce() == LoopVectorizeHints::FK_Enabled || + EC.getKnownMinValue() > 1; } bool isPotentiallyUnsafe() const { @@ -202,9 +218,10 @@ public: Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI, OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, - AssumptionCache *AC) + AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI) : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), - GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {} + GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), + BFI(BFI), PSI(PSI) {} /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. @@ -229,6 +246,7 @@ public: /// Return true if we can vectorize this loop while folding its tail by /// masking, and mark all respective loads/stores for masking. + /// This object's state is only modified iff this function returns true. bool prepareToFoldTailByMasking(); /// Returns the primary induction variable. @@ -287,6 +305,19 @@ public: /// Returns true if the value V is uniform within the loop. bool isUniform(Value *V); + /// A uniform memory op is a load or store which accesses the same memory + /// location on all lanes. + bool isUniformMemOp(Instruction &I) { + Value *Ptr = getLoadStorePointerOperand(&I); + if (!Ptr) + return false; + // Note: There's nothing inherent which prevents predicated loads and + // stores from being uniform. The current lowering simply doesn't handle + // it; in particular, the cost model distinguishes scatter/gather from + // scalar w/predication, and we currently rely on the scalar path. + return isUniform(Ptr) && !blockNeedsPredication(I.getParent()); + } + /// Returns the information that we collected about runtime memory check. const RuntimePointerChecking *getRuntimePointerChecking() const { return LAI->getRuntimePointerChecking(); @@ -294,17 +325,21 @@ public: const LoopAccessInfo *getLAI() const { return LAI; } + bool isSafeForAnyVectorWidth() const { + return LAI->getDepChecker().isSafeForAnyVectorWidth(); + } + unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } - uint64_t getMaxSafeRegisterWidth() const { - return LAI->getDepChecker().getMaxSafeRegisterWidth(); + uint64_t getMaxSafeVectorWidthInBits() const { + return LAI->getDepChecker().getMaxSafeVectorWidthInBits(); } bool hasStride(Value *V) { return LAI->hasStride(V); } /// Returns true if vector representation of the instruction \p I /// requires mask. - bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); } + bool isMaskRequired(const Instruction *I) { return MaskedOp.contains(I); } unsigned getNumStores() const { return LAI->getNumStores(); } unsigned getNumLoads() const { return LAI->getNumLoads(); } @@ -369,8 +404,14 @@ private: /// its original trip-count, under a proper guard, which should be preserved. /// \p SafePtrs is a list of addresses that are known to be legal and we know /// that we can read from them without segfault. + /// \p MaskedOp is a list of instructions that have to be transformed into + /// calls to the appropriate masked intrinsic when the loop is vectorized. + /// \p ConditionalAssumes is a list of assume instructions in predicated + /// blocks that must be dropped if the CFG gets flattened. bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, - bool PreserveGuards = false); + SmallPtrSetImpl<const Instruction *> &MaskedOp, + SmallPtrSetImpl<Instruction *> &ConditionalAssumes, + bool PreserveGuards = false) const; /// Updates the vectorization state by adding \p Phi to the inductions list. /// This can set \p Phi as the main induction of the loop if \p Phi is a @@ -478,6 +519,10 @@ private: /// Assume instructions in predicated blocks must be dropped if the CFG gets /// flattened. SmallPtrSet<Instruction *, 8> ConditionalAssumes; + + /// BFI and PSI are used to check for profile guided size optimizations. + BlockFrequencyInfo *BFI; + ProfileSummaryInfo *PSI; }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h index 77236dec75dc..52a57939209c 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -22,11 +22,11 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/None.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/PassManager.h" namespace llvm { +class AAResults; class AssumptionCache; class BasicBlock; class CmpInst; @@ -34,6 +34,7 @@ class DataLayout; class DemandedBits; class DominatorTree; class Function; +class GetElementPtrInst; class InsertElementInst; class InsertValueInst; class Instruction; @@ -63,7 +64,7 @@ struct SLPVectorizerPass : public PassInfoMixin<SLPVectorizerPass> { ScalarEvolution *SE = nullptr; TargetTransformInfo *TTI = nullptr; TargetLibraryInfo *TLI = nullptr; - AliasAnalysis *AA = nullptr; + AAResults *AA = nullptr; LoopInfo *LI = nullptr; DominatorTree *DT = nullptr; AssumptionCache *AC = nullptr; @@ -75,7 +76,7 @@ public: // Glue for old PM. bool runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_, - TargetLibraryInfo *TLI_, AliasAnalysis *AA_, LoopInfo *LI_, + TargetLibraryInfo *TLI_, AAResults *AA_, LoopInfo *LI_, DominatorTree *DT_, AssumptionCache *AC_, DemandedBits *DB_, OptimizationRemarkEmitter *ORE_); diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap index b262311a96a0..a199f7f2d79a 100644 --- a/llvm/include/llvm/module.modulemap +++ b/llvm/include/llvm/module.modulemap @@ -30,6 +30,7 @@ module LLVM_Backend { // These are intended for (repeated) textual inclusion. textual header "CodeGen/DIEValue.def" + textual header "CodeGen/MachinePassRegistry.def" } } @@ -65,6 +66,7 @@ module LLVM_BinaryFormat { textual header "BinaryFormat/ELFRelocs/ARC.def" textual header "BinaryFormat/ELFRelocs/AVR.def" textual header "BinaryFormat/ELFRelocs/BPF.def" + textual header "BinaryFormat/ELFRelocs/CSKY.def" textual header "BinaryFormat/ELFRelocs/Hexagon.def" textual header "BinaryFormat/ELFRelocs/i386.def" textual header "BinaryFormat/ELFRelocs/Lanai.def" @@ -188,23 +190,30 @@ module LLVM_ExecutionEngine { exclude header "ExecutionEngine/Orc/RemoteObjectLayer.h" // Exclude headers from LLVM_OrcSupport. - exclude header "ExecutionEngine/Orc/OrcError.h" + exclude header "ExecutionEngine/Orc/Shared/OrcError.h" exclude header "ExecutionEngine/Orc/RPC/RPCUtils.h" exclude header "ExecutionEngine/Orc/RPC/RPCSerialization.h" exclude header "ExecutionEngine/Orc/RPC/RawByteChannel.h" } +module LLVM_FileCheck { + requires cplusplus + + umbrella "FileCheck" + module * { export * } +} + // Orc utilities that don't depend only on Support (not ExecutionEngine or // IR). This is a workaround for ExecutionEngine's broken layering, and will // be removed in the future. module LLVM_OrcSupport { requires cplusplus - header "ExecutionEngine/Orc/OrcError.h" - header "ExecutionEngine/Orc/RPC/RPCUtils.h" - header "ExecutionEngine/Orc/RPC/RPCSerialization.h" - header "ExecutionEngine/Orc/RPC/RawByteChannel.h" + header "ExecutionEngine/Orc/Shared/OrcError.h" + header "ExecutionEngine/Orc/Shared/RPCUtils.h" + header "ExecutionEngine/Orc/Shared/Serialization.h" + header "ExecutionEngine/Orc/Shared/RawByteChannel.h" export * } @@ -253,6 +262,7 @@ module LLVM_intrinsic_gen { module IR_CFG { header "IR/CFG.h" export * } module IR_ConstantRange { header "IR/ConstantRange.h" export * } module IR_Dominators { header "IR/Dominators.h" export * } + module IR_FixedPointBuilder { header "IR/FixedPointBuilder.h" export * } module Analysis_PostDominators { header "Analysis/PostDominators.h" export * } module Analysis_DomTreeUpdater { header "Analysis/DomTreeUpdater.h" export * } module IR_IRBuilder { header "IR/IRBuilder.h" export * } @@ -388,7 +398,7 @@ module LLVM_Utils { umbrella "Support" module * { export * } - + // Exclude this; it should only be used on Windows. exclude header "Support/Windows/WindowsSupport.h" @@ -397,8 +407,9 @@ module LLVM_Utils { exclude header "Support/Solaris/sys/regset.h" // These are intended for textual inclusion. - textual header "Support/ARMTargetParser.def" textual header "Support/AArch64TargetParser.def" + textual header "Support/ARMTargetParser.def" + textual header "Support/RISCVTargetParser.def" textual header "Support/TargetOpcodes.def" textual header "Support/X86TargetParser.def" } |