diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-11-19 20:06:13 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-11-19 20:06:13 +0000 |
commit | c0981da47d5696fe36474fcf86b4ce03ae3ff818 (patch) | |
tree | f42add1021b9f2ac6a69ac7cf6c4499962739a45 /llvm/include | |
parent | 344a3780b2e33f6ca763666c380202b18aab72a3 (diff) | |
download | src-c0981da47d5696fe36474fcf86b4ce03ae3ff818.tar.gz src-c0981da47d5696fe36474fcf86b4ce03ae3ff818.zip |
Vendor import of llvm-project main llvmorg-14-init-10186-gff7f2cfa959b.vendor/llvm-project/llvmorg-14-init-10186-gff7f2cfa959b
Diffstat (limited to 'llvm/include')
485 files changed, 17447 insertions, 11766 deletions
diff --git a/llvm/include/llvm-c/Comdat.h b/llvm/include/llvm-c/Comdat.h index 81cde1107fa4..8002bc0581af 100644 --- a/llvm/include/llvm-c/Comdat.h +++ b/llvm/include/llvm-c/Comdat.h @@ -19,6 +19,13 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @defgroup LLVMCCoreComdat Comdats + * @ingroup LLVMCCore + * + * @{ + */ + typedef enum { LLVMAnyComdatSelectionKind, ///< The linker may choose any COMDAT. LLVMExactMatchComdatSelectionKind, ///< The data referenced by the COMDAT must @@ -66,6 +73,10 @@ LLVMComdatSelectionKind LLVMGetComdatSelectionKind(LLVMComdatRef C); */ void LLVMSetComdatSelectionKind(LLVMComdatRef C, LLVMComdatSelectionKind Kind); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index 1a5e763cfc60..d170eff17951 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -1580,10 +1580,10 @@ LLVMTypeRef LLVMX86AMXType(void); macro(ConstantVector) \ macro(GlobalValue) \ macro(GlobalAlias) \ - macro(GlobalIFunc) \ macro(GlobalObject) \ macro(Function) \ macro(GlobalVariable) \ + macro(GlobalIFunc) \ macro(UndefValue) \ macro(PoisonValue) \ macro(Instruction) \ @@ -3287,7 +3287,7 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC); */ unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr); -void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, +void LLVMSetInstrParamAlignment(LLVMValueRef Instr, LLVMAttributeIndex Idx, unsigned Align); void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, @@ -3611,11 +3611,21 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Builder, LLVMMetadataRef Loc); * current debug location for the given builder. If the builder has no current * debug location, this function is a no-op. * + * @deprecated LLVMSetInstDebugLocation is deprecated in favor of the more general + * LLVMAddMetadataToInst. + * * @see llvm::IRBuilder::SetInstDebugLocation() */ void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst); /** + * Adds the metadata registered with the given builder to the given instruction. + * + * @see llvm::IRBuilder::AddMetadataToInst() + */ +void LLVMAddMetadataToInst(LLVMBuilderRef Builder, LLVMValueRef Inst); + +/** * Get the dafult floating-point math metadata for a given builder. * * @see llvm::IRBuilder::getDefaultFPMathTag() @@ -4081,6 +4091,7 @@ void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf); /** * @defgroup LLVMCCorePassRegistry Pass Registry + * @ingroup LLVMCCore * * @{ */ @@ -4095,6 +4106,7 @@ LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void); /** * @defgroup LLVMCCorePassManagers Pass Managers + * @ingroup LLVMCCore * * @{ */ diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index 8c085807914b..d7fb898b60d2 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -22,6 +22,13 @@ LLVM_C_EXTERN_C_BEGIN /** + * @defgroup LLVMCCoreDebugInfo Debug Information + * @ingroup LLVMCCore + * + * @{ + */ + +/** * Debug info flags. */ typedef enum { @@ -227,6 +234,13 @@ void LLVMDisposeDIBuilder(LLVMDIBuilderRef Builder); void LLVMDIBuilderFinalize(LLVMDIBuilderRef Builder); /** + * Finalize a specific subprogram. + * No new variables may be added to this subprogram afterwards. + */ +void LLVMDIBuilderFinalizeSubprogram(LLVMDIBuilderRef Builder, + LLVMMetadataRef Subprogram); + +/** * A CompileUnit provides an anchor for all debugging * information generated during this instance of compilation. * \param Lang Source programming language, eg. @@ -389,48 +403,48 @@ LLVMDIBuilderCreateImportedModuleFromNamespace(LLVMDIBuilderRef Builder, * \param ImportedEntity Previous imported entity to alias. * \param File File where the declaration is located. * \param Line Line number of the declaration. + * \param Elements Renamed elements. + * \param NumElements Number of renamed elements. */ -LLVMMetadataRef -LLVMDIBuilderCreateImportedModuleFromAlias(LLVMDIBuilderRef Builder, - LLVMMetadataRef Scope, - LLVMMetadataRef ImportedEntity, - LLVMMetadataRef File, - unsigned Line); +LLVMMetadataRef LLVMDIBuilderCreateImportedModuleFromAlias( + LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, + LLVMMetadataRef ImportedEntity, LLVMMetadataRef File, unsigned Line, + LLVMMetadataRef *Elements, unsigned NumElements); /** * Create a descriptor for an imported module. - * \param Builder The \c DIBuilder. - * \param Scope The scope this module is imported into - * \param M The module being imported here - * \param File File where the declaration is located. - * \param Line Line number of the declaration. + * \param Builder The \c DIBuilder. + * \param Scope The scope this module is imported into + * \param M The module being imported here + * \param File File where the declaration is located. + * \param Line Line number of the declaration. + * \param Elements Renamed elements. + * \param NumElements Number of renamed elements. */ -LLVMMetadataRef -LLVMDIBuilderCreateImportedModuleFromModule(LLVMDIBuilderRef Builder, - LLVMMetadataRef Scope, - LLVMMetadataRef M, - LLVMMetadataRef File, - unsigned Line); +LLVMMetadataRef LLVMDIBuilderCreateImportedModuleFromModule( + LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, LLVMMetadataRef M, + LLVMMetadataRef File, unsigned Line, LLVMMetadataRef *Elements, + unsigned NumElements); /** * Create a descriptor for an imported function, type, or variable. Suitable * for e.g. FORTRAN-style USE declarations. - * \param Builder The DIBuilder. - * \param Scope The scope this module is imported into. - * \param Decl The declaration (or definition) of a function, type, - or variable. - * \param File File where the declaration is located. - * \param Line Line number of the declaration. - * \param Name A name that uniquely identifies this imported declaration. - * \param NameLen The length of the C string passed to \c Name. + * \param Builder The DIBuilder. + * \param Scope The scope this module is imported into. + * \param Decl The declaration (or definition) of a function, type, + or variable. + * \param File File where the declaration is located. + * \param Line Line number of the declaration. + * \param Name A name that uniquely identifies this imported + declaration. + * \param NameLen The length of the C string passed to \c Name. + * \param Elements Renamed elements. + * \param NumElements Number of renamed elements. */ -LLVMMetadataRef -LLVMDIBuilderCreateImportedDeclaration(LLVMDIBuilderRef Builder, - LLVMMetadataRef Scope, - LLVMMetadataRef Decl, - LLVMMetadataRef File, - unsigned Line, - const char *Name, size_t NameLen); +LLVMMetadataRef LLVMDIBuilderCreateImportedDeclaration( + LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, LLVMMetadataRef Decl, + LLVMMetadataRef File, unsigned Line, const char *Name, size_t NameLen, + LLVMMetadataRef *Elements, unsigned NumElements); /** * Creates a new DebugLocation that describes a source location. @@ -1360,6 +1374,10 @@ void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc); */ LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/DisassemblerTypes.h b/llvm/include/llvm-c/DisassemblerTypes.h index ae5c68227594..53baaef11033 100644 --- a/llvm/include/llvm-c/DisassemblerTypes.h +++ b/llvm/include/llvm-c/DisassemblerTypes.h @@ -18,6 +18,12 @@ #endif /** + * @addtogroup LLVMCDisassembler + * + * @{ + */ + +/** * An opaque reference to a disassembler context. */ typedef void *LLVMDisasmContextRef; @@ -157,4 +163,8 @@ typedef const char *(*LLVMSymbolLookupCallback)(void *DisInfo, /* The output reference is to a C++ symbol name. */ #define LLVMDisassembler_ReferenceType_DeMangled_Name 9 +/** + * @} + */ + #endif diff --git a/llvm/include/llvm-c/Error.h b/llvm/include/llvm-c/Error.h index bc702ac7a1bf..c3baaf65186a 100644 --- a/llvm/include/llvm-c/Error.h +++ b/llvm/include/llvm-c/Error.h @@ -18,6 +18,13 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @defgroup LLVMCError Error Handling + * @ingroup LLVMC + * + * @{ + */ + #define LLVMErrorSuccess 0 /** @@ -67,6 +74,10 @@ LLVMErrorTypeId LLVMGetStringErrorTypeId(void); */ LLVMErrorRef LLVMCreateStringError(const char *ErrMsg); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/ErrorHandling.h b/llvm/include/llvm-c/ErrorHandling.h index 5ba099c209c0..d9b9f22752b8 100644 --- a/llvm/include/llvm-c/ErrorHandling.h +++ b/llvm/include/llvm-c/ErrorHandling.h @@ -18,6 +18,12 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @addtogroup LLVMCError + * + * @{ + */ + typedef void (*LLVMFatalErrorHandler)(const char *Reason); /** @@ -42,6 +48,10 @@ void LLVMResetFatalErrorHandler(void); */ void LLVMEnablePrettyStackTrace(void); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/IRReader.h b/llvm/include/llvm-c/IRReader.h index 5a3f633c3d91..905b84fa5a86 100644 --- a/llvm/include/llvm-c/IRReader.h +++ b/llvm/include/llvm-c/IRReader.h @@ -20,6 +20,13 @@ LLVM_C_EXTERN_C_BEGIN /** + * @defgroup LLVMCCoreIRReader IR Reader + * @ingroup LLVMCCore + * + * @{ + */ + +/** * Read LLVM IR from a memory buffer and convert it into an in-memory Module * object. Returns 0 on success. * Optionally returns a human-readable description of any errors that @@ -32,6 +39,10 @@ LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef, LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM, char **OutMessage); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/LLJIT.h b/llvm/include/llvm-c/LLJIT.h index f689ca0f1cf0..a06133aac4fb 100644 --- a/llvm/include/llvm-c/LLJIT.h +++ b/llvm/include/llvm-c/LLJIT.h @@ -32,6 +32,13 @@ LLVM_C_EXTERN_C_BEGIN /** + * @defgroup LLVMCExecutionEngineLLJIT LLJIT + * @ingroup LLVMCExecutionEngine + * + * @{ + */ + +/** * A function for constructing an ObjectLinkingLayer instance to be used * by an LLJIT instance. * @@ -235,6 +242,10 @@ LLVMOrcIRTransformLayerRef LLVMOrcLLJITGetIRTransformLayer(LLVMOrcLLJITRef J); */ const char *LLVMOrcLLJITGetDataLayoutStr(LLVMOrcLLJITRef J); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif /* LLVM_C_LLJIT_H */ diff --git a/llvm/include/llvm-c/Linker.h b/llvm/include/llvm-c/Linker.h index 1ad9cc958753..acff5d5e2225 100644 --- a/llvm/include/llvm-c/Linker.h +++ b/llvm/include/llvm-c/Linker.h @@ -19,6 +19,13 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @defgroup LLVMCCoreLinker Linker + * @ingroup LLVMCCore + * + * @{ + */ + /* This enum is provided for backwards-compatibility only. It has no effect. */ typedef enum { LLVMLinkerDestroySource = 0, /* This is the default behavior. */ @@ -35,4 +42,8 @@ LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src); LLVM_C_EXTERN_C_END +/** + * @} + */ + #endif diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h index 1790afbcecc7..e2f30b7cdf45 100644 --- a/llvm/include/llvm-c/Orc.h +++ b/llvm/include/llvm-c/Orc.h @@ -34,6 +34,13 @@ LLVM_C_EXTERN_C_BEGIN /** + * @defgroup LLVMCExecutionEngineORC On-Request-Compilation + * @ingroup LLVMCExecutionEngine + * + * @{ + */ + +/** * Represents an address in the executor process. */ typedef uint64_t LLVMOrcJITTargetAddress; @@ -921,6 +928,49 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess( LLVMOrcSymbolPredicate Filter, void *FilterCtx); /** + * Get a LLVMOrcCreateDynamicLibararySearchGeneratorForPath that will reflect + * library symbols into the JITDylib. On success the resulting generator is + * owned by the client. Ownership is typically transferred by adding the + * instance to a JITDylib using LLVMOrcJITDylibAddGenerator, + * + * The GlobalPrefix argument specifies the character that appears on the front + * of linker-mangled symbols for the target platform (e.g. '_' on MachO). + * If non-null, this character will be stripped from the start of all symbol + * strings before passing the remaining substring to dlsym. + * + * The optional Filter and Ctx arguments can be used to supply a symbol name + * filter: Only symbols for which the filter returns true will be visible to + * JIT'd code. If the Filter argument is null then all library symbols will + * be visible to JIT'd code. Note that the symbol name passed to the Filter + * function is the full mangled symbol: The client is responsible for stripping + * the global prefix if present. + * + * THIS API IS EXPERIMENTAL AND LIKELY TO CHANGE IN THE NEAR FUTURE! + * + */ +LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForPath( + LLVMOrcDefinitionGeneratorRef *Result, const char *FileName, + char GlobalPrefix, LLVMOrcSymbolPredicate Filter, void *FilterCtx); + +/** + * Get a LLVMOrcCreateStaticLibrarySearchGeneratorForPath that will reflect + * static library symbols into the JITDylib. On success the resulting + * generator is owned by the client. Ownership is typically transferred by + * adding the instance to a JITDylib using LLVMOrcJITDylibAddGenerator, + * + * Call with the optional TargetTriple argument will succeed if the file at + * the given path is a static library or a MachO universal binary containing a + * static library that is compatible with the given triple. Otherwise it will + * return an error. + * + * THIS API IS EXPERIMENTAL AND LIKELY TO CHANGE IN THE NEAR FUTURE! + * + */ +LLVMErrorRef LLVMOrcCreateStaticLibrarySearchGeneratorForPath( + LLVMOrcDefinitionGeneratorRef *Result, LLVMOrcObjectLayerRef ObjLayer, + const char *FileName, const char *TargetTriple); + +/** * Create a ThreadSafeContext containing a new LLVMContext. * * Ownership of the underlying ThreadSafeContext data is shared: Clients @@ -1133,6 +1183,10 @@ void LLVMOrcDisposeDumpObjects(LLVMOrcDumpObjectsRef DumpObjects); LLVMErrorRef LLVMOrcDumpObjects_CallOperator(LLVMOrcDumpObjectsRef DumpObjects, LLVMMemoryBufferRef *ObjBuffer); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif /* LLVM_C_ORC_H */ diff --git a/llvm/include/llvm-c/OrcEE.h b/llvm/include/llvm-c/OrcEE.h index 2435e7421a42..e7ae0f5e6be2 100644 --- a/llvm/include/llvm-c/OrcEE.h +++ b/llvm/include/llvm-c/OrcEE.h @@ -33,6 +33,13 @@ LLVM_C_EXTERN_C_BEGIN /** + * @defgroup LLVMCExecutionEngineORCEE ExecutionEngine-based ORC Utils + * @ingroup LLVMCExecutionEngine + * + * @{ + */ + +/** * Create a RTDyldObjectLinkingLayer instance using the standard * SectionMemoryManager for memory management. */ @@ -50,6 +57,10 @@ void LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener( LLVMOrcObjectLayerRef RTDyldObjLinkingLayer, LLVMJITEventListenerRef Listener); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif /* LLVM_C_ORCEE_H */ diff --git a/llvm/include/llvm-c/Support.h b/llvm/include/llvm-c/Support.h index 866df32efa98..17657861b32b 100644 --- a/llvm/include/llvm-c/Support.h +++ b/llvm/include/llvm-c/Support.h @@ -21,6 +21,12 @@ LLVM_C_EXTERN_C_BEGIN /** + * @addtogroup LLVMCCore + * + * @{ + */ + +/** * This function permanently loads the dynamic library at the given path. * It is safe to call this function multiple times for the same library. * @@ -57,6 +63,10 @@ void *LLVMSearchForAddressOfSymbol(const char *symbolName); */ void LLVMAddSymbol(const char *symbolName, void *symbolValue); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/TargetMachine.h b/llvm/include/llvm-c/TargetMachine.h index f82edd948b59..23c8c63ff0b4 100644 --- a/llvm/include/llvm-c/TargetMachine.h +++ b/llvm/include/llvm-c/TargetMachine.h @@ -25,6 +25,12 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @addtogroup LLVMCTarget + * + * @{ + */ + typedef struct LLVMOpaqueTargetMachine *LLVMTargetMachineRef; typedef struct LLVMTarget *LLVMTargetRef; @@ -156,6 +162,10 @@ char* LLVMGetHostCPUFeatures(void); /** Adds the target-specific analysis passes to the pass manager. */ void LLVMAddAnalysisPasses(LLVMTargetMachineRef T, LLVMPassManagerRef PM); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/Transforms/PassBuilder.h b/llvm/include/llvm-c/Transforms/PassBuilder.h index 5635f10d6877..6d9f1b45c707 100644 --- a/llvm/include/llvm-c/Transforms/PassBuilder.h +++ b/llvm/include/llvm-c/Transforms/PassBuilder.h @@ -18,6 +18,13 @@ #include "llvm-c/TargetMachine.h" #include "llvm-c/Types.h" +/** + * @defgroup LLVMCCoreNewPM New Pass Manager + * @ingroup LLVMCCore + * + * @{ + */ + LLVM_C_EXTERN_C_BEGIN /** @@ -50,7 +57,7 @@ LLVMErrorRef LLVMRunPasses(LLVMModuleRef M, const char *Passes, * responsible for it. The client should call LLVMDisposePassBuilderOptions * to free the pass builder options. */ -LLVMPassBuilderOptionsRef LLVMCreatePassBuilderOptions(); +LLVMPassBuilderOptionsRef LLVMCreatePassBuilderOptions(void); /** * Toggle adding the VerifierPass for the PassBuilder, ensuring all functions @@ -97,6 +104,10 @@ void LLVMPassBuilderOptionsSetMergeFunctions(LLVMPassBuilderOptionsRef Options, */ void LLVMDisposePassBuilderOptions(LLVMPassBuilderOptionsRef Options); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif // LLVM_C_TRANSFORMS_PASSBUILDER_H diff --git a/llvm/include/llvm-c/lto.h b/llvm/include/llvm-c/lto.h index f6fc8588f5f7..5ceb02224d2b 100644 --- a/llvm/include/llvm-c/lto.h +++ b/llvm/include/llvm-c/lto.h @@ -46,7 +46,7 @@ typedef bool lto_bool_t; * @{ */ -#define LTO_API_VERSION 28 +#define LTO_API_VERSION 29 /** * \since prior to LTO_API_VERSION=3 @@ -313,6 +313,16 @@ extern lto_bool_t lto_module_get_macho_cputype(lto_module_t mod, unsigned int *out_cpusubtype); /** + * This function can be used by the linker to check if a given module has + * any constructor or destructor functions. + * + * Returns true if the module has either the @llvm.global_ctors or the + * @llvm.global_dtors symbol. Otherwise returns false. + * + * \since LTO_API_VERSION=29 + */ +extern lto_bool_t lto_module_has_ctor_dtor(lto_module_t mod); +/** * Diagnostic severity. * * \since LTO_API_VERSION=7 diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index f493a03b4b87..40e0e32c77a8 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -961,9 +961,7 @@ public: /// Returns a float which is bitcasted from an all one value int. /// /// \param Semantics - type float semantics - /// \param BitWidth - Select float type - static APFloat getAllOnesValue(const fltSemantics &Semantics, - unsigned BitWidth); + static APFloat getAllOnesValue(const fltSemantics &Semantics); /// Used to insert APFloat objects, or objects that contain APFloat objects, /// into FoldingSets. diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index ff586f763e82..595cd94b6b8f 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -31,7 +31,7 @@ class raw_ostream; template <typename T> class SmallVectorImpl; template <typename T> class ArrayRef; template <typename T> class Optional; -template <typename T> struct DenseMapInfo; +template <typename T, typename Enable> struct DenseMapInfo; class APInt; @@ -66,6 +66,11 @@ inline APInt operator-(APInt); /// not. /// * In general, the class tries to follow the style of computation that LLVM /// uses in its IR. This simplifies its use for LLVM. +/// * APInt supports zero-bit-width values, but operations that require bits +/// are not defined on it (e.g. you cannot ask for the sign of a zero-bit +/// integer). This means that operations like zero extension and logical +/// shifts are defined, but sign extension and ashr is not. Zero bit values +/// compare and hash equal to themselves, and countLeadingZeros returns 0. /// class LLVM_NODISCARD APInt { public: @@ -87,176 +92,6 @@ public: static constexpr WordType WORDTYPE_MAX = ~WordType(0); -private: - /// This union is used to store the integer value. When the - /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal. - union { - uint64_t VAL; ///< Used to store the <= 64 bits integer value. - uint64_t *pVal; ///< Used to store the >64 bits integer value. - } U; - - unsigned BitWidth; ///< The number of bits in this APInt. - - friend struct DenseMapInfo<APInt>; - - friend class APSInt; - - /// Fast internal constructor - /// - /// This constructor is used only internally for speed of construction of - /// temporaries. It is unsafe for general use so it is not public. - APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { - U.pVal = val; - } - - /// Determine which word a bit is in. - /// - /// \returns the word position for the specified bit position. - static unsigned whichWord(unsigned bitPosition) { - return bitPosition / APINT_BITS_PER_WORD; - } - - /// Determine which bit in a word a bit is in. - /// - /// \returns the bit position in a word for the specified bit position - /// in the APInt. - static unsigned whichBit(unsigned bitPosition) { - return bitPosition % APINT_BITS_PER_WORD; - } - - /// Get a single bit mask. - /// - /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set - /// This method generates and returns a uint64_t (word) mask for a single - /// bit at a specific bit position. This is used to mask the bit in the - /// corresponding word. - static uint64_t maskBit(unsigned bitPosition) { - return 1ULL << whichBit(bitPosition); - } - - /// Clear unused high order bits - /// - /// This method is used internally to clear the top "N" bits in the high order - /// word that are not used by the APInt. This is needed after the most - /// significant word is assigned a value to ensure that those bits are - /// zero'd out. - APInt &clearUnusedBits() { - // Compute how many bits are used in the final word - unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1; - - // Mask out the high bits. - uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits); - if (isSingleWord()) - U.VAL &= mask; - else - U.pVal[getNumWords() - 1] &= mask; - return *this; - } - - /// Get the word corresponding to a bit position - /// \returns the corresponding word for the specified bit position. - uint64_t getWord(unsigned bitPosition) const { - return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)]; - } - - /// Utility method to change the bit width of this APInt to new bit width, - /// allocating and/or deallocating as necessary. There is no guarantee on the - /// value of any bits upon return. Caller should populate the bits after. - void reallocate(unsigned NewBitWidth); - - /// Convert a char array into an APInt - /// - /// \param radix 2, 8, 10, 16, or 36 - /// Converts a string into a number. The string must be non-empty - /// and well-formed as a number of the given base. The bit-width - /// must be sufficient to hold the result. - /// - /// This is used by the constructors that take string arguments. - /// - /// StringRef::getAsInteger is superficially similar but (1) does - /// not assume that the string is well-formed and (2) grows the - /// result to hold the input. - void fromString(unsigned numBits, StringRef str, uint8_t radix); - - /// An internal division function for dividing APInts. - /// - /// This is used by the toString method to divide by the radix. It simply - /// provides a more convenient form of divide for internal use since KnuthDiv - /// has specific constraints on its inputs. If those constraints are not met - /// then it provides a simpler form of divide. - static void divide(const WordType *LHS, unsigned lhsWords, - const WordType *RHS, unsigned rhsWords, WordType *Quotient, - WordType *Remainder); - - /// out-of-line slow case for inline constructor - void initSlowCase(uint64_t val, bool isSigned); - - /// shared code between two array constructors - void initFromArray(ArrayRef<uint64_t> array); - - /// out-of-line slow case for inline copy constructor - void initSlowCase(const APInt &that); - - /// out-of-line slow case for shl - void shlSlowCase(unsigned ShiftAmt); - - /// out-of-line slow case for lshr. - void lshrSlowCase(unsigned ShiftAmt); - - /// out-of-line slow case for ashr. - void ashrSlowCase(unsigned ShiftAmt); - - /// out-of-line slow case for operator= - void AssignSlowCase(const APInt &RHS); - - /// out-of-line slow case for operator== - bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY; - - /// out-of-line slow case for countLeadingZeros - unsigned countLeadingZerosSlowCase() const LLVM_READONLY; - - /// out-of-line slow case for countLeadingOnes. - unsigned countLeadingOnesSlowCase() const LLVM_READONLY; - - /// out-of-line slow case for countTrailingZeros. - unsigned countTrailingZerosSlowCase() const LLVM_READONLY; - - /// out-of-line slow case for countTrailingOnes - unsigned countTrailingOnesSlowCase() const LLVM_READONLY; - - /// out-of-line slow case for countPopulation - unsigned countPopulationSlowCase() const LLVM_READONLY; - - /// out-of-line slow case for intersects. - bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY; - - /// out-of-line slow case for isSubsetOf. - bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY; - - /// out-of-line slow case for setBits. - void setBitsSlowCase(unsigned loBit, unsigned hiBit); - - /// out-of-line slow case for flipAllBits. - void flipAllBitsSlowCase(); - - /// out-of-line slow case for operator&=. - void AndAssignSlowCase(const APInt& RHS); - - /// out-of-line slow case for operator|=. - void OrAssignSlowCase(const APInt& RHS); - - /// out-of-line slow case for operator^=. - void XorAssignSlowCase(const APInt& RHS); - - /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal - /// to, or greater than RHS. - int compare(const APInt &RHS) const LLVM_READONLY; - - /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal - /// to, or greater than RHS. - int compareSigned(const APInt &RHS) const LLVM_READONLY; - -public: /// \name Constructors /// @{ @@ -272,7 +107,6 @@ public: /// \param isSigned how to treat signedness of val APInt(unsigned numBits, uint64_t val, bool isSigned = false) : BitWidth(numBits) { - assert(BitWidth && "bitwidth too small"); if (isSingleWord()) { U.VAL = val; clearUnusedBits(); @@ -312,7 +146,9 @@ public: /// \param radix the radix to use for the conversion APInt(unsigned numBits, StringRef str, uint8_t radix); - /// Simply makes *this a copy of that. + /// Default constructor that creates an APInt with a 1-bit zero value. + explicit APInt() : BitWidth(1) { U.VAL = 0; } + /// Copy Constructor. APInt(const APInt &that) : BitWidth(that.BitWidth) { if (isSingleWord()) @@ -333,19 +169,131 @@ public: delete[] U.pVal; } - /// Default constructor that creates an uninteresting APInt - /// representing a 1-bit zero value. + /// @} + /// \name Value Generators + /// @{ + + /// Get the '0' value for the specified bit-width. + static APInt getZero(unsigned numBits) { return APInt(numBits, 0); } + + /// NOTE: This is soft-deprecated. Please use `getZero()` instead. + static APInt getNullValue(unsigned numBits) { return getZero(numBits); } + + /// Return an APInt zero bits wide. + static APInt getZeroWidth() { return getZero(0); } + + /// Gets maximum unsigned value of APInt for specific bit width. + static APInt getMaxValue(unsigned numBits) { return getAllOnes(numBits); } + + /// Gets maximum signed value of APInt for a specific bit width. + static APInt getSignedMaxValue(unsigned numBits) { + APInt API = getAllOnes(numBits); + API.clearBit(numBits - 1); + return API; + } + + /// Gets minimum unsigned value of APInt for a specific bit width. + static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); } + + /// Gets minimum signed value of APInt for a specific bit width. + static APInt getSignedMinValue(unsigned numBits) { + APInt API(numBits, 0); + API.setBit(numBits - 1); + return API; + } + + /// Get the SignMask for a specific bit width. /// - /// This is useful for object deserialization (pair this with the static - /// method Read). - explicit APInt() : BitWidth(1) { U.VAL = 0; } + /// This is just a wrapper function of getSignedMinValue(), and it helps code + /// readability when we want to get a SignMask. + static APInt getSignMask(unsigned BitWidth) { + return getSignedMinValue(BitWidth); + } - /// Returns whether this instance allocated memory. - bool needsCleanup() const { return !isSingleWord(); } + /// Return an APInt of a specified width with all bits set. + static APInt getAllOnes(unsigned numBits) { + return APInt(numBits, WORDTYPE_MAX, true); + } - /// Used to insert APInt objects, or objects that contain APInt objects, into - /// FoldingSets. - void Profile(FoldingSetNodeID &id) const; + /// NOTE: This is soft-deprecated. Please use `getAllOnes()` instead. + static APInt getAllOnesValue(unsigned numBits) { return getAllOnes(numBits); } + + /// Return an APInt with exactly one bit set in the result. + static APInt getOneBitSet(unsigned numBits, unsigned BitNo) { + APInt Res(numBits, 0); + Res.setBit(BitNo); + return Res; + } + + /// Get a value with a block of bits set. + /// + /// Constructs an APInt value that has a contiguous range of bits set. The + /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other + /// bits will be zero. For example, with parameters(32, 0, 16) you would get + /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than + /// \p hiBit. + /// + /// \param numBits the intended bit width of the result + /// \param loBit the index of the lowest bit set. + /// \param hiBit the index of the highest bit set. + /// + /// \returns An APInt value with the requested bits set. + static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) { + APInt Res(numBits, 0); + Res.setBits(loBit, hiBit); + return Res; + } + + /// Wrap version of getBitsSet. + /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet. + /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example, + /// with parameters (32, 28, 4), you would get 0xF000000F. + /// If \p hiBit is equal to \p loBit, you would get a result with all bits + /// set. + static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, + unsigned hiBit) { + APInt Res(numBits, 0); + Res.setBitsWithWrap(loBit, hiBit); + return Res; + } + + /// Constructs an APInt value that has a contiguous range of bits set. The + /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other + /// bits will be zero. For example, with parameters(32, 12) you would get + /// 0xFFFFF000. + /// + /// \param numBits the intended bit width of the result + /// \param loBit the index of the lowest bit to set. + /// + /// \returns An APInt value with the requested bits set. + static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) { + APInt Res(numBits, 0); + Res.setBitsFrom(loBit); + return Res; + } + + /// Constructs an APInt value that has the top hiBitsSet bits set. + /// + /// \param numBits the bitwidth of the result + /// \param hiBitsSet the number of high-order bits set in the result. + static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) { + APInt Res(numBits, 0); + Res.setHighBits(hiBitsSet); + return Res; + } + + /// Constructs an APInt value that has the bottom loBitsSet bits set. + /// + /// \param numBits the bitwidth of the result + /// \param loBitsSet the number of low-order bits set in the result. + static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) { + APInt Res(numBits, 0); + Res.setLowBits(loBitsSet); + return Res; + } + + /// Return a value containing V broadcasted over NewLen bits. + static APInt getSplat(unsigned NewLen, const APInt &V); /// @} /// \name Value Tests @@ -373,7 +321,7 @@ public: /// This tests the high bit of this APInt to determine if it is set. /// /// \returns true if this APInt has its sign bit set, false otherwise. - bool isSignBitSet() const { return (*this)[BitWidth-1]; } + bool isSignBitSet() const { return (*this)[BitWidth - 1]; } /// Determine if sign bit of this APInt is clear. /// @@ -388,50 +336,62 @@ public: /// that 0 is not a positive value. /// /// \returns true if this APInt is positive. - bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); } + bool isStrictlyPositive() const { return isNonNegative() && !isZero(); } /// Determine if this APInt Value is non-positive (<= 0). /// /// \returns true if this APInt is non-positive. bool isNonPositive() const { return !isStrictlyPositive(); } - /// Determine if all bits are set - /// - /// This checks to see if the value has all bits of the APInt are set or not. - bool isAllOnesValue() const { + /// Determine if all bits are set. This is true for zero-width values. + bool isAllOnes() const { + if (BitWidth == 0) + return true; if (isSingleWord()) return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth); return countTrailingOnesSlowCase() == BitWidth; } - /// Determine if all bits are clear - /// - /// This checks to see if the value has all bits of the APInt are clear or - /// not. - bool isNullValue() const { return !*this; } + /// NOTE: This is soft-deprecated. Please use `isAllOnes()` instead. + bool isAllOnesValue() const { return isAllOnes(); } + + /// Determine if this value is zero, i.e. all bits are clear. + bool isZero() const { + if (isSingleWord()) + return U.VAL == 0; + return countLeadingZerosSlowCase() == BitWidth; + } + + /// NOTE: This is soft-deprecated. Please use `isZero()` instead. + bool isNullValue() const { return isZero(); } /// Determine if this is a value of 1. /// /// This checks to see if the value of this APInt is one. - bool isOneValue() const { + bool isOne() const { if (isSingleWord()) return U.VAL == 1; return countLeadingZerosSlowCase() == BitWidth - 1; } + /// NOTE: This is soft-deprecated. Please use `isOne()` instead. + bool isOneValue() const { return isOne(); } + /// Determine if this is the largest unsigned value. /// /// This checks to see if the value of this APInt is the maximum unsigned /// value for the APInt's bit width. - bool isMaxValue() const { return isAllOnesValue(); } + bool isMaxValue() const { return isAllOnes(); } /// Determine if this is the largest signed value. /// /// This checks to see if the value of this APInt is the maximum signed /// value for the APInt's bit width. bool isMaxSignedValue() const { - if (isSingleWord()) + if (isSingleWord()) { + assert(BitWidth && "zero width values not allowed"); return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1); + } return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1; } @@ -439,39 +399,48 @@ public: /// /// This checks to see if the value of this APInt is the minimum unsigned /// value for the APInt's bit width. - bool isMinValue() const { return isNullValue(); } + bool isMinValue() const { return isZero(); } /// Determine if this is the smallest signed value. /// /// This checks to see if the value of this APInt is the minimum signed /// value for the APInt's bit width. bool isMinSignedValue() const { - if (isSingleWord()) + if (isSingleWord()) { + assert(BitWidth && "zero width values not allowed"); return U.VAL == (WordType(1) << (BitWidth - 1)); + } return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1; } /// Check if this APInt has an N-bits unsigned integer value. - bool isIntN(unsigned N) const { - assert(N && "N == 0 ???"); - return getActiveBits() <= N; - } + bool isIntN(unsigned N) const { return getActiveBits() <= N; } /// Check if this APInt has an N-bits signed integer value. - bool isSignedIntN(unsigned N) const { - assert(N && "N == 0 ???"); - return getMinSignedBits() <= N; - } + bool isSignedIntN(unsigned N) const { return getMinSignedBits() <= N; } /// Check if this APInt's value is a power of two greater than zero. /// /// \returns true if the argument APInt value is a power of two > 0. bool isPowerOf2() const { - if (isSingleWord()) + if (isSingleWord()) { + assert(BitWidth && "zero width values not allowed"); return isPowerOf2_64(U.VAL); + } return countPopulationSlowCase() == 1; } + /// Check if this APInt's negated value is a power of two greater than zero. + bool isNegatedPowerOf2() const { + assert(BitWidth && "zero width values not allowed"); + if (isNonNegative()) + return false; + // NegatedPowerOf2 - shifted mask in the top bits. + unsigned LO = countLeadingOnes(); + unsigned TZ = countTrailingZeros(); + return (LO + TZ) == BitWidth; + } + /// Check if the APInt's value is returned by getSignMask. /// /// \returns true if this is the value returned by getSignMask. @@ -480,7 +449,7 @@ public: /// Convert APInt to a boolean value. /// /// This converts the APInt to a boolean value as a test against zero. - bool getBoolValue() const { return !!*this; } + bool getBoolValue() const { return !isZero(); } /// If this value is smaller than the specified limit, return it, otherwise /// return the limit value. This causes the value to saturate to the limit. @@ -527,152 +496,22 @@ public: return (Ones + LeadZ + countTrailingZeros()) == BitWidth; } - /// @} - /// \name Value Generators - /// @{ - - /// Gets maximum unsigned value of APInt for specific bit width. - static APInt getMaxValue(unsigned numBits) { - return getAllOnesValue(numBits); - } - - /// Gets maximum signed value of APInt for a specific bit width. - static APInt getSignedMaxValue(unsigned numBits) { - APInt API = getAllOnesValue(numBits); - API.clearBit(numBits - 1); - return API; - } - - /// Gets minimum unsigned value of APInt for a specific bit width. - static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); } - - /// Gets minimum signed value of APInt for a specific bit width. - static APInt getSignedMinValue(unsigned numBits) { - APInt API(numBits, 0); - API.setBit(numBits - 1); - return API; - } - - /// Get the SignMask for a specific bit width. - /// - /// This is just a wrapper function of getSignedMinValue(), and it helps code - /// readability when we want to get a SignMask. - static APInt getSignMask(unsigned BitWidth) { - return getSignedMinValue(BitWidth); - } - - /// Get the all-ones value. - /// - /// \returns the all-ones value for an APInt of the specified bit-width. - static APInt getAllOnesValue(unsigned numBits) { - return APInt(numBits, WORDTYPE_MAX, true); - } - - /// Get the '0' value. - /// - /// \returns the '0' value for an APInt of the specified bit-width. - static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); } - /// Compute an APInt containing numBits highbits from this APInt. /// - /// Get an APInt with the same BitWidth as this APInt, just zero mask - /// the low bits and right shift to the least significant bit. + /// Get an APInt with the same BitWidth as this APInt, just zero mask the low + /// bits and right shift to the least significant bit. /// /// \returns the high "numBits" bits of this APInt. APInt getHiBits(unsigned numBits) const; /// Compute an APInt containing numBits lowbits from this APInt. /// - /// Get an APInt with the same BitWidth as this APInt, just zero mask - /// the high bits. + /// Get an APInt with the same BitWidth as this APInt, just zero mask the high + /// bits. /// /// \returns the low "numBits" bits of this APInt. APInt getLoBits(unsigned numBits) const; - /// Return an APInt with exactly one bit set in the result. - static APInt getOneBitSet(unsigned numBits, unsigned BitNo) { - APInt Res(numBits, 0); - Res.setBit(BitNo); - return Res; - } - - /// Get a value with a block of bits set. - /// - /// Constructs an APInt value that has a contiguous range of bits set. The - /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other - /// bits will be zero. For example, with parameters(32, 0, 16) you would get - /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than - /// \p hiBit. - /// - /// \param numBits the intended bit width of the result - /// \param loBit the index of the lowest bit set. - /// \param hiBit the index of the highest bit set. - /// - /// \returns An APInt value with the requested bits set. - static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) { - assert(loBit <= hiBit && "loBit greater than hiBit"); - APInt Res(numBits, 0); - Res.setBits(loBit, hiBit); - return Res; - } - - /// Wrap version of getBitsSet. - /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet. - /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example, - /// with parameters (32, 28, 4), you would get 0xF000000F. - /// If \p hiBit is equal to \p loBit, you would get a result with all bits - /// set. - static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, - unsigned hiBit) { - APInt Res(numBits, 0); - Res.setBitsWithWrap(loBit, hiBit); - return Res; - } - - /// Get a value with upper bits starting at loBit set. - /// - /// Constructs an APInt value that has a contiguous range of bits set. The - /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other - /// bits will be zero. For example, with parameters(32, 12) you would get - /// 0xFFFFF000. - /// - /// \param numBits the intended bit width of the result - /// \param loBit the index of the lowest bit to set. - /// - /// \returns An APInt value with the requested bits set. - static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) { - APInt Res(numBits, 0); - Res.setBitsFrom(loBit); - return Res; - } - - /// Get a value with high bits set - /// - /// Constructs an APInt value that has the top hiBitsSet bits set. - /// - /// \param numBits the bitwidth of the result - /// \param hiBitsSet the number of high-order bits set in the result. - static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) { - APInt Res(numBits, 0); - Res.setHighBits(hiBitsSet); - return Res; - } - - /// Get a value with low bits set - /// - /// Constructs an APInt value that has the bottom loBitsSet bits set. - /// - /// \param numBits the bitwidth of the result - /// \param loBitsSet the number of low-order bits set in the result. - static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) { - APInt Res(numBits, 0); - Res.setLowBits(loBitsSet); - return Res; - } - - /// Return a value containing V broadcasted over NewLen bits. - static APInt getSplat(unsigned NewLen, const APInt &V); - /// Determine if two APInts have the same value, after zero-extending /// one of them (if needed!) to ensure that the bit-widths match. static bool isSameValue(const APInt &I1, const APInt &I2) { @@ -701,12 +540,10 @@ public: /// \name Unary Operators /// @{ - /// Postfix increment operator. - /// - /// Increments *this by 1. + /// Postfix increment operator. Increment *this by 1. /// /// \returns a new APInt value representing the original value of *this. - const APInt operator++(int) { + APInt operator++(int) { APInt API(*this); ++(*this); return API; @@ -717,12 +554,10 @@ public: /// \returns *this incremented by one APInt &operator++(); - /// Postfix decrement operator. - /// - /// Decrements *this by 1. + /// Postfix decrement operator. Decrement *this by 1. /// /// \returns a new APInt value representing the original value of *this. - const APInt operator--(int) { + APInt operator--(int) { APInt API(*this); --(*this); return API; @@ -733,16 +568,9 @@ public: /// \returns *this decremented by one. APInt &operator--(); - /// Logical negation operator. - /// - /// Performs logical negation operation on this APInt. - /// - /// \returns true if *this is zero, false otherwise. - bool operator!() const { - if (isSingleWord()) - return U.VAL == 0; - return countLeadingZerosSlowCase() == BitWidth; - } + /// Logical negation operation on this APInt returns true if zero, like normal + /// integers. + bool operator!() const { return isZero(); } /// @} /// \name Assignment Operators @@ -752,14 +580,15 @@ public: /// /// \returns *this after assignment of RHS. APInt &operator=(const APInt &RHS) { - // If the bitwidths are the same, we can avoid mucking with memory + // The common case (both source or dest being inline) doesn't require + // allocation or deallocation. if (isSingleWord() && RHS.isSingleWord()) { U.VAL = RHS.U.VAL; BitWidth = RHS.BitWidth; - return clearUnusedBits(); + return *this; } - AssignSlowCase(RHS); + assignSlowCase(RHS); return *this; } @@ -780,7 +609,6 @@ public: BitWidth = that.BitWidth; that.BitWidth = 0; - return *this; } @@ -812,7 +640,7 @@ public: if (isSingleWord()) U.VAL &= RHS.U.VAL; else - AndAssignSlowCase(RHS); + andAssignSlowCase(RHS); return *this; } @@ -827,7 +655,7 @@ public: return *this; } U.pVal[0] &= RHS; - memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); + memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); return *this; } @@ -842,7 +670,7 @@ public: if (isSingleWord()) U.VAL |= RHS.U.VAL; else - OrAssignSlowCase(RHS); + orAssignSlowCase(RHS); return *this; } @@ -871,7 +699,7 @@ public: if (isSingleWord()) U.VAL ^= RHS.U.VAL; else - XorAssignSlowCase(RHS); + xorAssignSlowCase(RHS); return *this; } @@ -1057,6 +885,17 @@ public: /// Rotate right by rotateAmt. APInt rotr(const APInt &rotateAmt) const; + /// Concatenate the bits from "NewLSB" onto the bottom of *this. This is + /// equivalent to: + /// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth) + APInt concat(const APInt &NewLSB) const { + /// If the result will be small, then both the merged values are small. + unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth(); + if (NewWidth <= APINT_BITS_PER_WORD) + return APInt(NewWidth, (U.VAL << NewLSB.getBitWidth()) | NewLSB.U.VAL); + return concatSlowCase(NewLSB); + } + /// Unsigned division operation. /// /// Perform an unsigned divide operation on this APInt by RHS. Both this and @@ -1151,7 +990,7 @@ public: assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths"); if (isSingleWord()) return U.VAL == RHS.U.VAL; - return EqualSlowCase(RHS); + return equalSlowCase(RHS); } /// Equality operator. @@ -1436,8 +1275,6 @@ public: clearUnusedBits(); } - /// Set a given bit to 1. - /// /// Set the given bit to 1 whose position is given as "bitPosition". void setBit(unsigned BitPosition) { assert(BitPosition < BitWidth && "BitPosition out of range"); @@ -1449,9 +1286,7 @@ public: } /// Set the sign bit to 1. - void setSignBit() { - setBit(BitWidth - 1); - } + void setSignBit() { setBit(BitWidth - 1); } /// Set a given bit to a given value. void setBitVal(unsigned BitPosition, bool BitValue) { @@ -1497,14 +1332,10 @@ public: } /// Set the top bits starting from loBit. - void setBitsFrom(unsigned loBit) { - return setBits(loBit, BitWidth); - } + void setBitsFrom(unsigned loBit) { return setBits(loBit, BitWidth); } /// Set the bottom loBits bits. - void setLowBits(unsigned loBits) { - return setBits(0, loBits); - } + void setLowBits(unsigned loBits) { return setBits(0, loBits); } /// Set the top hiBits bits. void setHighBits(unsigned hiBits) { @@ -1539,9 +1370,7 @@ public: } /// Set the sign bit to 0. - void clearSignBit() { - clearBit(BitWidth - 1); - } + void clearSignBit() { clearBit(BitWidth - 1); } /// Toggle every bit to its opposite value. void flipAllBits() { @@ -1629,8 +1458,10 @@ public: /// uint64_t. The bitwidth must be <= 64 or the value must fit within a /// uint64_t. Otherwise an assertion will result. uint64_t getZExtValue() const { - if (isSingleWord()) + if (isSingleWord()) { + assert(BitWidth && "zero width values not allowed"); return U.VAL; + } assert(getActiveBits() <= 64 && "Too many bits for uint64_t"); return U.pVal[0]; } @@ -1678,8 +1509,11 @@ public: /// \returns 0 if the high order bit is not set, otherwise returns the number /// of 1 bits from the most significant to the least unsigned countLeadingOnes() const { - if (isSingleWord()) + if (isSingleWord()) { + if (LLVM_UNLIKELY(BitWidth == 0)) + return 0; return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth)); + } return countLeadingOnesSlowCase(); } @@ -1774,9 +1608,7 @@ public: /// The conversion does not do a translation from integer to double, it just /// re-interprets the bits as a double. Note that it is valid to do this on /// any bit width. Exactly 64 bits will be translated. - double bitsToDouble() const { - return BitsToDouble(getWord(0)); - } + double bitsToDouble() const { return BitsToDouble(getWord(0)); } /// Converts APInt bits to a float /// @@ -1808,7 +1640,7 @@ public: /// @{ /// \returns the floor log base 2 of this APInt. - unsigned logBase2() const { return getActiveBits() - 1; } + unsigned logBase2() const { return getActiveBits() - 1; } /// \returns the ceil log base 2 of this APInt. unsigned ceilLogBase2() const { @@ -1826,25 +1658,7 @@ public: /// /// to get around any mathematical concerns resulting from /// referencing 2 in a space where 2 does no exist. - unsigned nearestLogBase2() const { - // Special case when we have a bitwidth of 1. If VAL is 1, then we - // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to - // UINT32_MAX. - if (BitWidth == 1) - return U.VAL - 1; - - // Handle the zero case. - if (isNullValue()) - return UINT32_MAX; - - // The non-zero case is handled by computing: - // - // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1]. - // - // where x[i] is referring to the value of the ith bit of x. - unsigned lg = logBase2(); - return lg + unsigned((*this)[lg - 1]); - } + unsigned nearestLogBase2() const; /// \returns the log base 2 of this APInt if its an exact power of two, -1 /// otherwise @@ -1854,12 +1668,12 @@ public: return logBase2(); } - /// Compute the square root + /// Compute the square root. APInt sqrt() const; - /// Get the absolute value; - /// - /// If *this is < 0 then return -(*this), otherwise *this; + /// Get the absolute value. If *this is < 0 then return -(*this), otherwise + /// *this. Note that the "most negative" signed number (e.g. -128 for 8 bit + /// wide APInt) is unchanged due to how negation works. APInt abs() const { if (isNegative()) return -(*this); @@ -1870,18 +1684,6 @@ public: APInt multiplicativeInverse(const APInt &modulo) const; /// @} - /// \name Support for division by constant - /// @{ - - /// Calculate the magic number for signed division by a constant. - struct ms; - ms magic() const; - - /// Calculate the magic number for unsigned division by a constant. - struct mu; - mu magicu(unsigned LeadingZeros = 0) const; - - /// @} /// \name Building-block Operations for APInt and APFloat /// @{ @@ -1908,9 +1710,8 @@ public: /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least /// significant bit of DST. All high bits above srcBITS in DST are /// zero-filled. - static void tcExtract(WordType *, unsigned dstCount, - const WordType *, unsigned srcBits, - unsigned srcLSB); + static void tcExtract(WordType *, unsigned dstCount, const WordType *, + unsigned srcBits, unsigned srcLSB); /// Set the given bit of a bignum. Zero-based. static void tcSetBit(WordType *, unsigned bit); @@ -1927,14 +1728,13 @@ public: static void tcNegate(WordType *, unsigned); /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag. - static WordType tcAdd(WordType *, const WordType *, - WordType carry, unsigned); + static WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned); /// DST += RHS. Returns the carry flag. static WordType tcAddPart(WordType *, WordType, unsigned); /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag. - static WordType tcSubtract(WordType *, const WordType *, - WordType carry, unsigned); + static WordType tcSubtract(WordType *, const WordType *, WordType carry, + unsigned); /// DST -= RHS. Returns the carry flag. static WordType tcSubtractPart(WordType *, WordType, unsigned); @@ -1950,8 +1750,7 @@ public: /// otherwise overflow occurred and return one. static int tcMultiplyPart(WordType *dst, const WordType *src, WordType multiplier, WordType carry, - unsigned srcParts, unsigned dstParts, - bool add); + unsigned srcParts, unsigned dstParts, bool add); /// DST = LHS * RHS, where DST has the same width as the operands and is /// filled with the least significant parts of the result. Returns one if @@ -1962,8 +1761,8 @@ public: /// DST = LHS * RHS, where DST has width the sum of the widths of the /// operands. No overflow occurs. DST must be disjoint from both operands. - static void tcFullMultiply(WordType *, const WordType *, - const WordType *, unsigned, unsigned); + static void tcFullMultiply(WordType *, const WordType *, const WordType *, + unsigned, unsigned); /// If RHS is zero LHS and REMAINDER are left unchanged, return one. /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set @@ -1974,9 +1773,8 @@ public: /// SCRATCH is a bignum of the same size as the operands and result for use by /// the routine; its contents need not be initialized and are destroyed. LHS, /// REMAINDER and SCRATCH must be distinct. - static int tcDivide(WordType *lhs, const WordType *rhs, - WordType *remainder, WordType *scratch, - unsigned parts); + static int tcDivide(WordType *lhs, const WordType *rhs, WordType *remainder, + WordType *scratch, unsigned parts); /// Shift a bignum left Count bits. Shifted in bits are zero. There are no /// restrictions on Count. @@ -1986,12 +1784,6 @@ public: /// restrictions on Count. static void tcShiftRight(WordType *, unsigned Words, unsigned Count); - /// The obvious AND, OR and XOR and complement operations. - static void tcAnd(WordType *, const WordType *, unsigned); - static void tcOr(WordType *, const WordType *, unsigned); - static void tcXor(WordType *, const WordType *, unsigned); - static void tcComplement(WordType *, unsigned); - /// Comparison (unsigned) of two bignums. static int tcCompare(const WordType *, const WordType *, unsigned); @@ -2005,26 +1797,185 @@ public: return tcSubtractPart(dst, 1, parts); } - /// Set the least significant BITS and clear the rest. - static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits); + /// Used to insert APInt objects, or objects that contain APInt objects, into + /// FoldingSets. + void Profile(FoldingSetNodeID &id) const; /// debug method void dump() const; - /// @} -}; + /// Returns whether this instance allocated memory. + bool needsCleanup() const { return !isSingleWord(); } -/// Magic data for optimising signed division by a constant. -struct APInt::ms { - APInt m; ///< magic number - unsigned s; ///< shift amount -}; +private: + /// This union is used to store the integer value. When the + /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal. + union { + uint64_t VAL; ///< Used to store the <= 64 bits integer value. + uint64_t *pVal; ///< Used to store the >64 bits integer value. + } U; + + unsigned BitWidth; ///< The number of bits in this APInt. + + friend struct DenseMapInfo<APInt, void>; + friend class APSInt; + + /// This constructor is used only internally for speed of construction of + /// temporaries. It is unsafe since it takes ownership of the pointer, so it + /// is not public. + APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { U.pVal = val; } + + /// Determine which word a bit is in. + /// + /// \returns the word position for the specified bit position. + static unsigned whichWord(unsigned bitPosition) { + return bitPosition / APINT_BITS_PER_WORD; + } + + /// Determine which bit in a word the specified bit position is in. + static unsigned whichBit(unsigned bitPosition) { + return bitPosition % APINT_BITS_PER_WORD; + } + + /// Get a single bit mask. + /// + /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set + /// This method generates and returns a uint64_t (word) mask for a single + /// bit at a specific bit position. This is used to mask the bit in the + /// corresponding word. + static uint64_t maskBit(unsigned bitPosition) { + return 1ULL << whichBit(bitPosition); + } + + /// Clear unused high order bits + /// + /// This method is used internally to clear the top "N" bits in the high order + /// word that are not used by the APInt. This is needed after the most + /// significant word is assigned a value to ensure that those bits are + /// zero'd out. + APInt &clearUnusedBits() { + // Compute how many bits are used in the final word. + unsigned WordBits = ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1; + + // Mask out the high bits. + uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits); + if (LLVM_UNLIKELY(BitWidth == 0)) + mask = 0; + + if (isSingleWord()) + U.VAL &= mask; + else + U.pVal[getNumWords() - 1] &= mask; + return *this; + } + + /// Get the word corresponding to a bit position + /// \returns the corresponding word for the specified bit position. + uint64_t getWord(unsigned bitPosition) const { + return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)]; + } + + /// Utility method to change the bit width of this APInt to new bit width, + /// allocating and/or deallocating as necessary. There is no guarantee on the + /// value of any bits upon return. Caller should populate the bits after. + void reallocate(unsigned NewBitWidth); + + /// Convert a char array into an APInt + /// + /// \param radix 2, 8, 10, 16, or 36 + /// Converts a string into a number. The string must be non-empty + /// and well-formed as a number of the given base. The bit-width + /// must be sufficient to hold the result. + /// + /// This is used by the constructors that take string arguments. + /// + /// StringRef::getAsInteger is superficially similar but (1) does + /// not assume that the string is well-formed and (2) grows the + /// result to hold the input. + void fromString(unsigned numBits, StringRef str, uint8_t radix); + + /// An internal division function for dividing APInts. + /// + /// This is used by the toString method to divide by the radix. It simply + /// provides a more convenient form of divide for internal use since KnuthDiv + /// has specific constraints on its inputs. If those constraints are not met + /// then it provides a simpler form of divide. + static void divide(const WordType *LHS, unsigned lhsWords, + const WordType *RHS, unsigned rhsWords, WordType *Quotient, + WordType *Remainder); + + /// out-of-line slow case for inline constructor + void initSlowCase(uint64_t val, bool isSigned); + + /// shared code between two array constructors + void initFromArray(ArrayRef<uint64_t> array); + + /// out-of-line slow case for inline copy constructor + void initSlowCase(const APInt &that); + + /// out-of-line slow case for shl + void shlSlowCase(unsigned ShiftAmt); + + /// out-of-line slow case for lshr. + void lshrSlowCase(unsigned ShiftAmt); + + /// out-of-line slow case for ashr. + void ashrSlowCase(unsigned ShiftAmt); + + /// out-of-line slow case for operator= + void assignSlowCase(const APInt &RHS); + + /// out-of-line slow case for operator== + bool equalSlowCase(const APInt &RHS) const LLVM_READONLY; + + /// out-of-line slow case for countLeadingZeros + unsigned countLeadingZerosSlowCase() const LLVM_READONLY; + + /// out-of-line slow case for countLeadingOnes. + unsigned countLeadingOnesSlowCase() const LLVM_READONLY; + + /// out-of-line slow case for countTrailingZeros. + unsigned countTrailingZerosSlowCase() const LLVM_READONLY; + + /// out-of-line slow case for countTrailingOnes + unsigned countTrailingOnesSlowCase() const LLVM_READONLY; + + /// out-of-line slow case for countPopulation + unsigned countPopulationSlowCase() const LLVM_READONLY; + + /// out-of-line slow case for intersects. + bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY; + + /// out-of-line slow case for isSubsetOf. + bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY; + + /// out-of-line slow case for setBits. + void setBitsSlowCase(unsigned loBit, unsigned hiBit); + + /// out-of-line slow case for flipAllBits. + void flipAllBitsSlowCase(); -/// Magic data for optimising unsigned division by a constant. -struct APInt::mu { - APInt m; ///< magic number - bool a; ///< add indicator - unsigned s; ///< shift amount + /// out-of-line slow case for concat. + APInt concatSlowCase(const APInt &NewLSB) const; + + /// out-of-line slow case for operator&=. + void andAssignSlowCase(const APInt &RHS); + + /// out-of-line slow case for operator|=. + void orAssignSlowCase(const APInt &RHS); + + /// out-of-line slow case for operator^=. + void xorAssignSlowCase(const APInt &RHS); + + /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal + /// to, or greater than RHS. + int compare(const APInt &RHS) const LLVM_READONLY; + + /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal + /// to, or greater than RHS. + int compareSigned(const APInt &RHS) const LLVM_READONLY; + + /// @} }; inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; } @@ -2161,7 +2112,6 @@ inline APInt operator*(uint64_t LHS, APInt b) { return b; } - namespace APIntOps { /// Determine the smaller of two APInts considered to be signed. @@ -2277,7 +2227,16 @@ Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A, const APInt &B); -} // End of APIntOps namespace +/// Splat/Merge neighboring bits to widen/narrow the bitmask represented +/// by \param A to \param NewBitWidth bits. +/// +/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011 +/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111 +/// A.getBitwidth() or NewBitWidth must be a whole multiples of the other. +/// +/// TODO: Do we need a mode where all bits must be set when merging down? +APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth); +} // namespace APIntOps // See friend declaration above. This additional declaration is required in // order to compile LLVM with IBM xlC compiler. @@ -2292,7 +2251,7 @@ void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes); void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes); /// Provide DenseMapInfo for APInt. -template <> struct DenseMapInfo<APInt> { +template <> struct DenseMapInfo<APInt, void> { static inline APInt getEmptyKey() { APInt V(nullptr, 0); V.U.VAL = 0; diff --git a/llvm/include/llvm/ADT/APSInt.h b/llvm/include/llvm/ADT/APSInt.h index 1509d472f131..c1cf3c546070 100644 --- a/llvm/include/llvm/ADT/APSInt.h +++ b/llvm/include/llvm/ADT/APSInt.h @@ -58,7 +58,7 @@ public: /// that 0 is not a positive value. /// /// \returns true if this APSInt is positive. - bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); } + bool isStrictlyPositive() const { return isNonNegative() && !isZero(); } APSInt &operator=(APInt RHS) { // Retain our current sign. @@ -344,17 +344,17 @@ inline raw_ostream &operator<<(raw_ostream &OS, const APSInt &I) { } /// Provide DenseMapInfo for APSInt, using the DenseMapInfo for APInt. -template <> struct DenseMapInfo<APSInt> { +template <> struct DenseMapInfo<APSInt, void> { static inline APSInt getEmptyKey() { - return APSInt(DenseMapInfo<APInt>::getEmptyKey()); + return APSInt(DenseMapInfo<APInt, void>::getEmptyKey()); } static inline APSInt getTombstoneKey() { - return APSInt(DenseMapInfo<APInt>::getTombstoneKey()); + return APSInt(DenseMapInfo<APInt, void>::getTombstoneKey()); } static unsigned getHashValue(const APSInt &Key) { - return DenseMapInfo<APInt>::getHashValue(Key); + return DenseMapInfo<APInt, void>::getHashValue(Key); } static bool isEqual(const APSInt &LHS, const APSInt &RHS) { diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h index 2df49223c987..61f85cfc812b 100644 --- a/llvm/include/llvm/ADT/ArrayRef.h +++ b/llvm/include/llvm/ADT/ArrayRef.h @@ -26,8 +26,6 @@ namespace llvm { - template<typename T> struct DenseMapInfo; - /// ArrayRef - Represent a constant reference to an array (0 or more elements /// consecutively in memory), i.e. a start pointer and a length. It allows /// various APIs to take consecutive elements easily and conveniently. @@ -572,7 +570,7 @@ namespace llvm { } // Provide DenseMapInfo for ArrayRefs. - template <typename T> struct DenseMapInfo<ArrayRef<T>> { + template <typename T> struct DenseMapInfo<ArrayRef<T>, void> { static inline ArrayRef<T> getEmptyKey() { return ArrayRef<T>( reinterpret_cast<const T *>(~static_cast<uintptr_t>(0)), size_t(0)); diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h index 31d388073633..cd1964cbdd98 100644 --- a/llvm/include/llvm/ADT/BitVector.h +++ b/llvm/include/llvm/ADT/BitVector.h @@ -85,7 +85,7 @@ class BitVector { unsigned Size; // Size of bitvector in bits. public: - typedef unsigned size_type; + using size_type = unsigned; // Encapsulation of a single bit. class reference { @@ -536,8 +536,8 @@ public: [&Arg](auto const &BV) { return Arg.size() == BV; }) && "consistent sizes"); Out.resize(Arg.size()); - for (size_t i = 0, e = Arg.Bits.size(); i != e; ++i) - Out.Bits[i] = f(Arg.Bits[i], Args.Bits[i]...); + for (size_type I = 0, E = Arg.Bits.size(); I != E; ++I) + Out.Bits[I] = f(Arg.Bits[I], Args.Bits[I]...); Out.clear_unused_bits(); return Out; } @@ -545,16 +545,16 @@ public: BitVector &operator|=(const BitVector &RHS) { if (size() < RHS.size()) resize(RHS.size()); - for (size_t i = 0, e = RHS.Bits.size(); i != e; ++i) - Bits[i] |= RHS.Bits[i]; + for (size_type I = 0, E = RHS.Bits.size(); I != E; ++I) + Bits[I] |= RHS.Bits[I]; return *this; } BitVector &operator^=(const BitVector &RHS) { if (size() < RHS.size()) resize(RHS.size()); - for (size_t i = 0, e = RHS.Bits.size(); i != e; ++i) - Bits[i] ^= RHS.Bits[i]; + for (size_type I = 0, E = RHS.Bits.size(); I != E; ++I) + Bits[I] ^= RHS.Bits[I]; return *this; } @@ -808,11 +808,11 @@ private: public: /// Return the size (in bytes) of the bit vector. - size_t getMemorySize() const { return Bits.size() * sizeof(BitWord); } - size_t getBitCapacity() const { return Bits.size() * BITWORD_SIZE; } + size_type getMemorySize() const { return Bits.size() * sizeof(BitWord); } + size_type getBitCapacity() const { return Bits.size() * BITWORD_SIZE; } }; -inline size_t capacity_in_bytes(const BitVector &X) { +inline BitVector::size_type capacity_in_bytes(const BitVector &X) { return X.getMemorySize(); } @@ -824,8 +824,8 @@ template <> struct DenseMapInfo<BitVector> { return V; } static unsigned getHashValue(const BitVector &V) { - return DenseMapInfo<std::pair<unsigned, ArrayRef<uintptr_t>>>::getHashValue( - std::make_pair(V.size(), V.getData())); + return DenseMapInfo<std::pair<BitVector::size_type, ArrayRef<uintptr_t>>>:: + getHashValue(std::make_pair(V.size(), V.getData())); } static bool isEqual(const BitVector &LHS, const BitVector &RHS) { if (LHS.isInvalid() || RHS.isInvalid()) diff --git a/llvm/include/llvm/ADT/CombinationGenerator.h b/llvm/include/llvm/ADT/CombinationGenerator.h new file mode 100644 index 000000000000..ab6afd555726 --- /dev/null +++ b/llvm/include/llvm/ADT/CombinationGenerator.h @@ -0,0 +1,148 @@ +//===-- llvm/ADT/CombinationGenerator.h ------------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Combination generator. +/// +/// Example: given input {{0, 1}, {2}, {3, 4}} it will produce the following +/// combinations: {0, 2, 3}, {0, 2, 4}, {1, 2, 3}, {1, 2, 4}. +/// +/// It is useful to think of input as vector-of-vectors, where the +/// outer vector is the variable space, and inner vector is choice space. +/// The number of choices for each variable can be different. +/// +/// As for implementation, it is useful to think of this as a weird number, +/// where each digit (==variable) may have different base (==number of choices). +/// Thus modelling of 'produce next combination' is exactly analogous to the +/// incrementing of an number - increment lowest digit (pick next choice for the +/// variable), and if it wrapped to the beginning then increment next digit. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_COMBINATIONGENERATOR_H +#define LLVM_ADT_COMBINATIONGENERATOR_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include <cassert> +#include <cstring> + +namespace llvm { + +template <typename choice_type, typename choices_storage_type, + int variable_smallsize> +class CombinationGenerator { + template <typename T> struct WrappingIterator { + using value_type = T; + + const ArrayRef<value_type> Range; + typename decltype(Range)::const_iterator Position; + + // Rewind the tape, placing the position to again point at the beginning. + void rewind() { Position = Range.begin(); } + + // Advance position forward, possibly wrapping to the beginning. + // Returns whether the wrap happened. + bool advance() { + ++Position; + bool Wrapped = Position == Range.end(); + if (Wrapped) + rewind(); + return Wrapped; + } + + // Get the value at which we are currently pointing. + const value_type &operator*() const { return *Position; } + + WrappingIterator(ArrayRef<value_type> Range_) : Range(Range_) { + assert(!Range.empty() && "The range must not be empty."); + rewind(); + } + }; + + const ArrayRef<choices_storage_type> VariablesChoices; + + void performGeneration( + const function_ref<bool(ArrayRef<choice_type>)> Callback) const { + SmallVector<WrappingIterator<choice_type>, variable_smallsize> + VariablesState; + + // 'increment' of the the whole VariablesState is defined identically to the + // increment of a number: starting from the least significant element, + // increment it, and if it wrapped, then propagate that carry by also + // incrementing next (more significant) element. + auto IncrementState = + [](MutableArrayRef<WrappingIterator<choice_type>> VariablesState) + -> bool { + for (WrappingIterator<choice_type> &Variable : + llvm::reverse(VariablesState)) { + bool Wrapped = Variable.advance(); + if (!Wrapped) + return false; // There you go, next combination is ready. + // We have carry - increment more significant variable next.. + } + return true; // MSB variable wrapped, no more unique combinations. + }; + + // Initialize the per-variable state to refer to the possible choices for + // that variable. + VariablesState.reserve(VariablesChoices.size()); + for (ArrayRef<choice_type> VC : VariablesChoices) + VariablesState.emplace_back(VC); + + // Temporary buffer to store each combination before performing Callback. + SmallVector<choice_type, variable_smallsize> CurrentCombination; + CurrentCombination.resize(VariablesState.size()); + + while (true) { + // Gather the currently-selected variable choices into a vector. + for (auto I : llvm::zip(VariablesState, CurrentCombination)) + std::get<1>(I) = *std::get<0>(I); + // And pass the new combination into callback, as intended. + if (/*Abort=*/Callback(CurrentCombination)) + return; + // And tick the state to next combination, which will be unique. + if (IncrementState(VariablesState)) + return; // All combinations produced. + } + }; + +public: + CombinationGenerator(ArrayRef<choices_storage_type> VariablesChoices_) + : VariablesChoices(VariablesChoices_) { +#ifndef NDEBUG + assert(!VariablesChoices.empty() && "There should be some variables."); + llvm::for_each(VariablesChoices, [](ArrayRef<choice_type> VariableChoices) { + assert(!VariableChoices.empty() && + "There must always be some choice, at least a placeholder one."); + }); +#endif + } + + // How many combinations can we produce, max? + // This is at most how many times the callback will be called. + size_t numCombinations() const { + size_t NumVariants = 1; + for (ArrayRef<choice_type> VariableChoices : VariablesChoices) + NumVariants *= VariableChoices.size(); + assert(NumVariants >= 1 && + "We should always end up producing at least one combination"); + return NumVariants; + } + + // Actually perform exhaustive combination generation. + // Each result will be passed into the callback. + void generate(const function_ref<bool(ArrayRef<choice_type>)> Callback) { + performGeneration(Callback); + } +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h index d276acbfa6a6..75b7371a3683 100644 --- a/llvm/include/llvm/ADT/DenseMapInfo.h +++ b/llvm/include/llvm/ADT/DenseMapInfo.h @@ -13,10 +13,10 @@ #ifndef LLVM_ADT_DENSEMAPINFO_H #define LLVM_ADT_DENSEMAPINFO_H -#include "llvm/ADT/Hashing.h" #include <cassert> #include <cstddef> #include <cstdint> +#include <tuple> #include <utility> namespace llvm { @@ -39,7 +39,12 @@ static inline unsigned combineHashValue(unsigned a, unsigned b) { } // end namespace detail -template<typename T> +/// An information struct used to provide DenseMap with the various necessary +/// components for a given value type `T`. `Enable` is an optional additional +/// parameter that is used to support SFINAE (generally using std::enable_if_t) +/// in derived DenseMapInfo specializations; in non-SFINAE use cases this should +/// just be `void`. +template<typename T, typename Enable = void> struct DenseMapInfo { //static inline T getEmptyKey(); //static inline T getTombstoneKey(); @@ -282,13 +287,6 @@ template <typename... Ts> struct DenseMapInfo<std::tuple<Ts...>> { } }; -template <> struct DenseMapInfo<hash_code> { - static inline hash_code getEmptyKey() { return hash_code(-1); } - static inline hash_code getTombstoneKey() { return hash_code(-2); } - static unsigned getHashValue(hash_code val) { return val; } - static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; } -}; - } // end namespace llvm #endif // LLVM_ADT_DENSEMAPINFO_H diff --git a/llvm/include/llvm/ADT/EquivalenceClasses.h b/llvm/include/llvm/ADT/EquivalenceClasses.h index 273b00f99d5d..de6bb3bca7e3 100644 --- a/llvm/include/llvm/ADT/EquivalenceClasses.h +++ b/llvm/include/llvm/ADT/EquivalenceClasses.h @@ -30,7 +30,8 @@ namespace llvm { /// /// This implementation is an efficient implementation that only stores one copy /// of the element being indexed per entry in the set, and allows any arbitrary -/// type to be indexed (as long as it can be ordered with operator<). +/// type to be indexed (as long as it can be ordered with operator< or a +/// comparator is provided). /// /// Here is a simple example using integers: /// @@ -54,7 +55,7 @@ namespace llvm { /// 4 /// 5 1 2 /// -template <class ElemTy> +template <class ElemTy, class Compare = std::less<ElemTy>> class EquivalenceClasses { /// ECValue - The EquivalenceClasses data structure is just a set of these. /// Each of these represents a relation for a value. First it stores the @@ -101,22 +102,40 @@ class EquivalenceClasses { assert(RHS.isLeader() && RHS.getNext() == nullptr && "Not a singleton!"); } - bool operator<(const ECValue &UFN) const { return Data < UFN.Data; } - bool isLeader() const { return (intptr_t)Next & 1; } const ElemTy &getData() const { return Data; } const ECValue *getNext() const { return (ECValue*)((intptr_t)Next & ~(intptr_t)1); } + }; + + /// A wrapper of the comparator, to be passed to the set. + struct ECValueComparator { + using is_transparent = void; + + ECValueComparator() : compare(Compare()) {} + + bool operator()(const ECValue &lhs, const ECValue &rhs) const { + return compare(lhs.Data, rhs.Data); + } + + template <typename T> + bool operator()(const T &lhs, const ECValue &rhs) const { + return compare(lhs, rhs.Data); + } + + template <typename T> + bool operator()(const ECValue &lhs, const T &rhs) const { + return compare(lhs.Data, rhs); + } - template<typename T> - bool operator<(const T &Val) const { return Data < Val; } + const Compare compare; }; /// TheMapping - This implicitly provides a mapping from ElemTy values to the /// ECValues, it just keeps the key as part of the value. - std::set<ECValue> TheMapping; + std::set<ECValue, ECValueComparator> TheMapping; public: EquivalenceClasses() = default; diff --git a/llvm/include/llvm/ADT/FunctionExtras.h b/llvm/include/llvm/ADT/FunctionExtras.h index e67ef7377c88..5a37417ddde5 100644 --- a/llvm/include/llvm/ADT/FunctionExtras.h +++ b/llvm/include/llvm/ADT/FunctionExtras.h @@ -37,6 +37,7 @@ #include "llvm/ADT/STLForwardCompat.h" #include "llvm/Support/MemAlloc.h" #include "llvm/Support/type_traits.h" +#include <cstring> #include <memory> #include <type_traits> @@ -64,11 +65,16 @@ template <typename CallableT, typename ThisT> using EnableUnlessSameType = std::enable_if_t<!std::is_same<remove_cvref_t<CallableT>, ThisT>::value>; template <typename CallableT, typename Ret, typename... Params> -using EnableIfCallable = - std::enable_if_t<std::is_void<Ret>::value || - std::is_convertible<decltype(std::declval<CallableT>()( - std::declval<Params>()...)), - Ret>::value>; +using EnableIfCallable = std::enable_if_t<llvm::disjunction< + std::is_void<Ret>, + std::is_same<decltype(std::declval<CallableT>()(std::declval<Params>()...)), + Ret>, + std::is_same<const decltype(std::declval<CallableT>()( + std::declval<Params>()...)), + Ret>, + std::is_convertible<decltype(std::declval<CallableT>()( + std::declval<Params>()...)), + Ret>>::value>; template <typename ReturnT, typename... ParamTs> class UniqueFunctionBase { protected: diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h index e296c1c53ebd..74a87a3d8dbb 100644 --- a/llvm/include/llvm/ADT/Hashing.h +++ b/llvm/include/llvm/ADT/Hashing.h @@ -56,6 +56,7 @@ #include <utility> namespace llvm { +template <typename T, typename Enable> struct DenseMapInfo; /// An opaque object representing a hash code. /// @@ -677,6 +678,13 @@ hash_code hash_value(const std::basic_string<T> &arg) { return hash_combine_range(arg.begin(), arg.end()); } +template <> struct DenseMapInfo<hash_code, void> { + static inline hash_code getEmptyKey() { return hash_code(-1); } + static inline hash_code getTombstoneKey() { return hash_code(-2); } + static unsigned getHashValue(hash_code val) { return val; } + static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; } +}; + } // namespace llvm #endif diff --git a/llvm/include/llvm/ADT/ImmutableList.h b/llvm/include/llvm/ADT/ImmutableList.h index c9ee494734e7..cf27c5a16d28 100644 --- a/llvm/include/llvm/ADT/ImmutableList.h +++ b/llvm/include/llvm/ADT/ImmutableList.h @@ -220,8 +220,7 @@ public: // Partially-specialized Traits. //===----------------------------------------------------------------------===// -template<typename T> struct DenseMapInfo; -template<typename T> struct DenseMapInfo<ImmutableList<T>> { +template <typename T> struct DenseMapInfo<ImmutableList<T>, void> { static inline ImmutableList<T> getEmptyKey() { return reinterpret_cast<ImmutableListImpl<T>*>(-1); } diff --git a/llvm/include/llvm/ADT/IntervalMap.h b/llvm/include/llvm/ADT/IntervalMap.h index 26a7ed0cd333..3c107a3622a9 100644 --- a/llvm/include/llvm/ADT/IntervalMap.h +++ b/llvm/include/llvm/ADT/IntervalMap.h @@ -1137,7 +1137,7 @@ public: /// overlaps(a, b) - Return true if the intervals in this map overlap with the /// interval [a;b]. - bool overlaps(KeyT a, KeyT b) { + bool overlaps(KeyT a, KeyT b) const { assert(Traits::nonEmpty(a, b)); const_iterator I = find(a); if (!I.valid()) diff --git a/llvm/include/llvm/ADT/MapVector.h b/llvm/include/llvm/ADT/MapVector.h index 1de1124f4ea2..f9540999381a 100644 --- a/llvm/include/llvm/ADT/MapVector.h +++ b/llvm/include/llvm/ADT/MapVector.h @@ -43,6 +43,7 @@ class MapVector { "The mapped_type of the specified Map must be an integral type"); public: + using key_type = KeyT; using value_type = typename VectorType::value_type; using size_type = typename VectorType::size_type; diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h index cb8b202c48b7..393ace6b70fc 100644 --- a/llvm/include/llvm/ADT/PointerIntPair.h +++ b/llvm/include/llvm/ADT/PointerIntPair.h @@ -22,7 +22,7 @@ namespace llvm { -template <typename T> struct DenseMapInfo; +template <typename T, typename Enable> struct DenseMapInfo; template <typename PointerT, unsigned IntBits, typename PtrTraits> struct PointerIntPairInfo; @@ -192,7 +192,7 @@ struct PointerIntPairInfo { // Provide specialization of DenseMapInfo for PointerIntPair. template <typename PointerTy, unsigned IntBits, typename IntType> -struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> { +struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>, void> { using Ty = PointerIntPair<PointerTy, IntBits, IntType>; static Ty getEmptyKey() { diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h index c39691061b72..0874f67db3fe 100644 --- a/llvm/include/llvm/ADT/PointerUnion.h +++ b/llvm/include/llvm/ADT/PointerUnion.h @@ -17,42 +17,13 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/PointerLikeTypeTraits.h" +#include <algorithm> #include <cassert> #include <cstddef> #include <cstdint> namespace llvm { -template <typename T> struct PointerUnionTypeSelectorReturn { - using Return = T; -}; - -/// Get a type based on whether two types are the same or not. -/// -/// For: -/// -/// \code -/// using Ret = typename PointerUnionTypeSelector<T1, T2, EQ, NE>::Return; -/// \endcode -/// -/// Ret will be EQ type if T1 is same as T2 or NE type otherwise. -template <typename T1, typename T2, typename RET_EQ, typename RET_NE> -struct PointerUnionTypeSelector { - using Return = typename PointerUnionTypeSelectorReturn<RET_NE>::Return; -}; - -template <typename T, typename RET_EQ, typename RET_NE> -struct PointerUnionTypeSelector<T, T, RET_EQ, RET_NE> { - using Return = typename PointerUnionTypeSelectorReturn<RET_EQ>::Return; -}; - -template <typename T1, typename T2, typename RET_EQ, typename RET_NE> -struct PointerUnionTypeSelectorReturn< - PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>> { - using Return = - typename PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>::Return; -}; - namespace pointer_union_detail { /// Determine the number of bits required to store integers with values < n. /// This is ceil(log2(n)). diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index eb001346b609..48f15b02283a 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -272,20 +272,24 @@ template <typename T> auto drop_begin(T &&RangeOrContainer, size_t N = 1) { // be applied whenever operator* is invoked on the iterator. template <typename ItTy, typename FuncTy, - typename FuncReturnTy = - decltype(std::declval<FuncTy>()(*std::declval<ItTy>()))> + typename ReferenceTy = + decltype(std::declval<FuncTy>()(*std::declval<ItTy>()))> class mapped_iterator : public iterator_adaptor_base< - mapped_iterator<ItTy, FuncTy>, ItTy, - typename std::iterator_traits<ItTy>::iterator_category, - typename std::remove_reference<FuncReturnTy>::type> { + mapped_iterator<ItTy, FuncTy>, ItTy, + typename std::iterator_traits<ItTy>::iterator_category, + std::remove_reference_t<ReferenceTy>, + typename std::iterator_traits<ItTy>::difference_type, + std::remove_reference_t<ReferenceTy> *, ReferenceTy> { public: mapped_iterator(ItTy U, FuncTy F) : mapped_iterator::iterator_adaptor_base(std::move(U)), F(std::move(F)) {} ItTy getCurrent() { return this->I; } - FuncReturnTy operator*() const { return F(*this->I); } + const FuncTy &getFunction() const { return F; } + + ReferenceTy operator*() const { return F(*this->I); } private: FuncTy F; @@ -303,6 +307,32 @@ auto map_range(ContainerTy &&C, FuncTy F) { return make_range(map_iterator(C.begin(), F), map_iterator(C.end(), F)); } +/// A base type of mapped iterator, that is useful for building derived +/// iterators that do not need/want to store the map function (as in +/// mapped_iterator). These iterators must simply provide a `mapElement` method +/// that defines how to map a value of the iterator to the provided reference +/// type. +template <typename DerivedT, typename ItTy, typename ReferenceTy> +class mapped_iterator_base + : public iterator_adaptor_base< + DerivedT, ItTy, + typename std::iterator_traits<ItTy>::iterator_category, + std::remove_reference_t<ReferenceTy>, + typename std::iterator_traits<ItTy>::difference_type, + std::remove_reference_t<ReferenceTy> *, ReferenceTy> { +public: + using BaseT = mapped_iterator_base; + + mapped_iterator_base(ItTy U) + : mapped_iterator_base::iterator_adaptor_base(std::move(U)) {} + + ItTy getCurrent() { return this->I; } + + ReferenceTy operator*() const { + return static_cast<const DerivedT &>(*this).mapElement(*this->I); + } +}; + /// Helper to determine if type T has a member called rbegin(). template <typename Ty> class has_rbegin_impl { using yes = char[1]; @@ -371,12 +401,7 @@ class filter_iterator_base typename std::common_type< IterTag, typename std::iterator_traits< WrappedIteratorT>::iterator_category>::type> { - using BaseT = iterator_adaptor_base< - filter_iterator_base<WrappedIteratorT, PredicateT, IterTag>, - WrappedIteratorT, - typename std::common_type< - IterTag, typename std::iterator_traits< - WrappedIteratorT>::iterator_category>::type>; + using BaseT = typename filter_iterator_base::iterator_adaptor_base; protected: WrappedIteratorT End; @@ -411,12 +436,10 @@ template <typename WrappedIteratorT, typename PredicateT, typename IterTag = std::forward_iterator_tag> class filter_iterator_impl : public filter_iterator_base<WrappedIteratorT, PredicateT, IterTag> { - using BaseT = filter_iterator_base<WrappedIteratorT, PredicateT, IterTag>; - public: filter_iterator_impl(WrappedIteratorT Begin, WrappedIteratorT End, PredicateT Pred) - : BaseT(Begin, End, Pred) {} + : filter_iterator_impl::filter_iterator_base(Begin, End, Pred) {} }; /// Specialization of filter_iterator_base for bidirectional iteration. @@ -425,8 +448,8 @@ class filter_iterator_impl<WrappedIteratorT, PredicateT, std::bidirectional_iterator_tag> : public filter_iterator_base<WrappedIteratorT, PredicateT, std::bidirectional_iterator_tag> { - using BaseT = filter_iterator_base<WrappedIteratorT, PredicateT, - std::bidirectional_iterator_tag>; + using BaseT = typename filter_iterator_impl::filter_iterator_base; + void findPrevValid() { while (!this->Pred(*this->I)) BaseT::operator--(); @@ -514,9 +537,7 @@ template <typename WrappedIteratorT> class early_inc_iterator_impl : public iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>, WrappedIteratorT, std::input_iterator_tag> { - using BaseT = - iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>, - WrappedIteratorT, std::input_iterator_tag>; + using BaseT = typename early_inc_iterator_impl::iterator_adaptor_base; using PointerT = typename std::iterator_traits<WrappedIteratorT>::pointer; @@ -630,12 +651,18 @@ protected: return std::tuple<Iters...>(std::prev(std::get<Ns>(iterators))...); } + template <size_t... Ns> + bool test_all_equals(const zip_common &other, + std::index_sequence<Ns...>) const { + return all_of(std::initializer_list<bool>{std::get<Ns>(this->iterators) == + std::get<Ns>(other.iterators)...}, + identity<bool>{}); + } + public: zip_common(Iters &&... ts) : iterators(std::forward<Iters>(ts)...) {} - value_type operator*() { return deref(std::index_sequence_for<Iters...>{}); } - - const value_type operator*() const { + value_type operator*() const { return deref(std::index_sequence_for<Iters...>{}); } @@ -650,6 +677,11 @@ public: iterators = tup_dec(std::index_sequence_for<Iters...>{}); return *reinterpret_cast<ZipType *>(this); } + + /// Return true if all the iterator are matching `other`'s iterators. + bool all_equals(zip_common &other) { + return test_all_equals(other, std::index_sequence_for<Iters...>{}); + } }; template <typename... Iters> @@ -801,8 +833,6 @@ public: : iterators(std::forward<Iters>(ts.first)...), end_iterators(std::forward<Iters>(ts.second)...) {} - value_type operator*() { return deref(std::index_sequence_for<Iters...>{}); } - value_type operator*() const { return deref(std::index_sequence_for<Iters...>{}); } @@ -1073,8 +1103,7 @@ template <typename DerivedT, typename BaseT, typename T, typename PointerT = T *, typename ReferenceT = T &> class indexed_accessor_range_base { public: - using RangeBaseT = - indexed_accessor_range_base<DerivedT, BaseT, T, PointerT, ReferenceT>; + using RangeBaseT = indexed_accessor_range_base; /// An iterator element of this range. class iterator : public indexed_accessor_iterator<iterator, BaseT, T, @@ -1087,8 +1116,7 @@ public: private: iterator(BaseT owner, ptrdiff_t curIndex) - : indexed_accessor_iterator<iterator, BaseT, T, PointerT, ReferenceT>( - owner, curIndex) {} + : iterator::indexed_accessor_iterator(owner, curIndex) {} /// Allow access to the constructor. friend indexed_accessor_range_base<DerivedT, BaseT, T, PointerT, @@ -1234,20 +1262,39 @@ public: } }; +namespace detail { +/// Return a reference to the first or second member of a reference. Otherwise, +/// return a copy of the member of a temporary. +/// +/// When passing a range whose iterators return values instead of references, +/// the reference must be dropped from `decltype((elt.first))`, which will +/// always be a reference, to avoid returning a reference to a temporary. +template <typename EltTy, typename FirstTy> class first_or_second_type { +public: + using type = + typename std::conditional_t<std::is_reference<EltTy>::value, FirstTy, + std::remove_reference_t<FirstTy>>; +}; +} // end namespace detail + /// Given a container of pairs, return a range over the first elements. template <typename ContainerTy> auto make_first_range(ContainerTy &&c) { - return llvm::map_range( - std::forward<ContainerTy>(c), - [](decltype((*std::begin(c))) elt) -> decltype((elt.first)) { - return elt.first; - }); + using EltTy = decltype((*std::begin(c))); + return llvm::map_range(std::forward<ContainerTy>(c), + [](EltTy elt) -> typename detail::first_or_second_type< + EltTy, decltype((elt.first))>::type { + return elt.first; + }); } /// Given a container of pairs, return a range over the second elements. template <typename ContainerTy> auto make_second_range(ContainerTy &&c) { + using EltTy = decltype((*std::begin(c))); return llvm::map_range( std::forward<ContainerTy>(c), - [](decltype((*std::begin(c))) elt) -> decltype((elt.second)) { + [](EltTy elt) -> + typename detail::first_or_second_type<EltTy, + decltype((elt.second))>::type { return elt.second; }); } @@ -1260,7 +1307,7 @@ template <typename ContainerTy> auto make_second_range(ContainerTy &&c) { /// compares less than the first component of another std::pair. struct less_first { template <typename T> bool operator()(const T &lhs, const T &rhs) const { - return lhs.first < rhs.first; + return std::less<>()(lhs.first, rhs.first); } }; @@ -1268,7 +1315,7 @@ struct less_first { /// compares less than the second component of another std::pair. struct less_second { template <typename T> bool operator()(const T &lhs, const T &rhs) const { - return lhs.second < rhs.second; + return std::less<>()(lhs.second, rhs.second); } }; @@ -1877,8 +1924,7 @@ template <typename R> struct result_pair { } std::size_t index() const { return Index; } - const value_reference value() const { return *Iter; } - value_reference value() { return *Iter; } + value_reference value() const { return *Iter; } private: std::size_t Index = std::numeric_limits<std::size_t>::max(); @@ -1887,11 +1933,8 @@ private: template <typename R> class enumerator_iter - : public iterator_facade_base< - enumerator_iter<R>, std::forward_iterator_tag, result_pair<R>, - typename std::iterator_traits<IterOfRange<R>>::difference_type, - typename std::iterator_traits<IterOfRange<R>>::pointer, - typename std::iterator_traits<IterOfRange<R>>::reference> { + : public iterator_facade_base<enumerator_iter<R>, std::forward_iterator_tag, + const result_pair<R>> { using result_type = result_pair<R>; public: @@ -1901,7 +1944,6 @@ public: enumerator_iter(std::size_t Index, IterOfRange<R> Iter) : Result(Index, Iter) {} - result_type &operator*() { return Result; } const result_type &operator*() const { return Result; } enumerator_iter &operator++() { @@ -1986,6 +2028,45 @@ decltype(auto) apply_tuple(F &&f, Tuple &&t) { Indices{}); } +namespace detail { + +template <typename Predicate, typename... Args> +bool all_of_zip_predicate_first(Predicate &&P, Args &&...args) { + auto z = zip(args...); + auto it = z.begin(); + auto end = z.end(); + while (it != end) { + if (!apply_tuple([&](auto &&...args) { return P(args...); }, *it)) + return false; + ++it; + } + return it.all_equals(end); +} + +// Just an adaptor to switch the order of argument and have the predicate before +// the zipped inputs. +template <typename... ArgsThenPredicate, size_t... InputIndexes> +bool all_of_zip_predicate_last( + std::tuple<ArgsThenPredicate...> argsThenPredicate, + std::index_sequence<InputIndexes...>) { + auto constexpr OutputIndex = + std::tuple_size<decltype(argsThenPredicate)>::value - 1; + return all_of_zip_predicate_first(std::get<OutputIndex>(argsThenPredicate), + std::get<InputIndexes>(argsThenPredicate)...); +} + +} // end namespace detail + +/// Compare two zipped ranges using the provided predicate (as last argument). +/// Return true if all elements satisfy the predicate and false otherwise. +// Return false if the zipped iterator aren't all at end (size mismatch). +template <typename... ArgsAndPredicate> +bool all_of_zip(ArgsAndPredicate &&...argsAndPredicate) { + return detail::all_of_zip_predicate_last( + std::forward_as_tuple(argsAndPredicate...), + std::make_index_sequence<sizeof...(argsAndPredicate) - 1>{}); +} + /// Return true if the sequence [Begin, End) has exactly N items. Runs in O(N) /// time. Not meant for use with random-access iterators. /// Can optionally take a predicate to filter lazily some items. diff --git a/llvm/include/llvm/ADT/Sequence.h b/llvm/include/llvm/ADT/Sequence.h index 3e4bf0932222..fdbf397984d0 100644 --- a/llvm/include/llvm/ADT/Sequence.h +++ b/llvm/include/llvm/ADT/Sequence.h @@ -6,9 +6,74 @@ // //===----------------------------------------------------------------------===// /// \file -/// This routine provides some synthesis utilities to produce sequences of -/// values. The names are intentionally kept very short as they tend to occur -/// in common and widely used contexts. +/// Provides some synthesis utilities to produce sequences of values. The names +/// are intentionally kept very short as they tend to occur in common and +/// widely used contexts. +/// +/// The `seq(A, B)` function produces a sequence of values from `A` to up to +/// (but not including) `B`, i.e., [`A`, `B`), that can be safely iterated over. +/// `seq` supports both integral (e.g., `int`, `char`, `uint32_t`) and enum +/// types. `seq_inclusive(A, B)` produces a sequence of values from `A` to `B`, +/// including `B`. +/// +/// Examples with integral types: +/// ``` +/// for (int x : seq(0, 3)) +/// outs() << x << " "; +/// ``` +/// +/// Prints: `0 1 2 `. +/// +/// ``` +/// for (int x : seq_inclusive(0, 3)) +/// outs() << x << " "; +/// ``` +/// +/// Prints: `0 1 2 3 `. +/// +/// Similar to `seq` and `seq_inclusive`, the `enum_seq` and +/// `enum_seq_inclusive` functions produce sequences of enum values that can be +/// iterated over. +/// To enable iteration with enum types, you need to either mark enums as safe +/// to iterate on by specializing `enum_iteration_traits`, or opt into +/// potentially unsafe iteration at every callsite by passing +/// `force_iteration_on_noniterable_enum`. +/// +/// Examples with enum types: +/// ``` +/// namespace X { +/// enum class MyEnum : unsigned {A = 0, B, C}; +/// } // namespace X +/// +/// template <> struct enum_iteration_traits<X::MyEnum> { +/// static contexpr bool is_iterable = true; +/// }; +/// +/// class MyClass { +/// public: +/// enum Safe { D = 3, E, F }; +/// enum MaybeUnsafe { G = 1, H = 2, I = 4 }; +/// }; +/// +/// template <> struct enum_iteration_traits<MyClass::Safe> { +/// static contexpr bool is_iterable = true; +/// }; +/// ``` +/// +/// ``` +/// for (auto v : enum_seq(MyClass::Safe::D, MyClass::Safe::F)) +/// outs() << int(v) << " "; +/// ``` +/// +/// Prints: `3 4 `. +/// +/// ``` +/// for (auto v : enum_seq(MyClass::MaybeUnsafe::H, MyClass::MaybeUnsafe::I, +/// force_iteration_on_noniterable_enum)) +/// outs() << int(v) << " "; +/// ``` +/// +/// Prints: `2 3 `. /// //===----------------------------------------------------------------------===// @@ -18,12 +83,31 @@ #include <cassert> // assert #include <iterator> // std::random_access_iterator_tag #include <limits> // std::numeric_limits -#include <type_traits> // std::underlying_type, std::is_enum +#include <type_traits> // std::is_integral, std::is_enum, std::underlying_type, + // std::enable_if #include "llvm/Support/MathExtras.h" // AddOverflow / SubOverflow namespace llvm { +// Enum traits that marks enums as safe or unsafe to iterate over. +// By default, enum types are *not* considered safe for iteration. +// To allow iteration for your enum type, provide a specialization with +// `is_iterable` set to `true` in the `llvm` namespace. +// Alternatively, you can pass the `force_iteration_on_noniterable_enum` tag +// to `enum_seq` or `enum_seq_inclusive`. +template <typename EnumT> struct enum_iteration_traits { + static constexpr bool is_iterable = false; +}; + +struct force_iteration_on_noniterable_enum_t { + explicit force_iteration_on_noniterable_enum_t() = default; +}; + +// TODO: Make this `inline` once we update to C++17 to avoid ORD violations. +constexpr force_iteration_on_noniterable_enum_t + force_iteration_on_noniterable_enum; + namespace detail { // Returns whether a value of type U can be represented with type T. @@ -213,27 +297,81 @@ private: iterator PastEndValue; }; -/// Iterate over an integral/enum type from Begin up to - but not including - -/// End. -/// Note on enum iteration: `seq` will generate each consecutive value, even if -/// no enumerator with that value exists. +/// Iterate over an integral type from Begin up to - but not including - End. /// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for /// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse /// iteration). -template <typename T> auto seq(T Begin, T End) { +template <typename T, typename = std::enable_if_t<std::is_integral<T>::value && + !std::is_enum<T>::value>> +auto seq(T Begin, T End) { return iota_range<T>(Begin, End, false); } -/// Iterate over an integral/enum type from Begin to End inclusive. -/// Note on enum iteration: `seq_inclusive` will generate each consecutive -/// value, even if no enumerator with that value exists. +/// Iterate over an integral type from Begin to End inclusive. /// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1] /// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse /// iteration). -template <typename T> auto seq_inclusive(T Begin, T End) { +template <typename T, typename = std::enable_if_t<std::is_integral<T>::value && + !std::is_enum<T>::value>> +auto seq_inclusive(T Begin, T End) { return iota_range<T>(Begin, End, true); } +/// Iterate over an enum type from Begin up to - but not including - End. +/// Note: `enum_seq` will generate each consecutive value, even if no +/// enumerator with that value exists. +/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for +/// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse +/// iteration). +template <typename EnumT, + typename = std::enable_if_t<std::is_enum<EnumT>::value>> +auto enum_seq(EnumT Begin, EnumT End) { + static_assert(enum_iteration_traits<EnumT>::is_iterable, + "Enum type is not marked as iterable."); + return iota_range<EnumT>(Begin, End, false); +} + +/// Iterate over an enum type from Begin up to - but not including - End, even +/// when `EnumT` is not marked as safely iterable by `enum_iteration_traits`. +/// Note: `enum_seq` will generate each consecutive value, even if no +/// enumerator with that value exists. +/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for +/// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse +/// iteration). +template <typename EnumT, + typename = std::enable_if_t<std::is_enum<EnumT>::value>> +auto enum_seq(EnumT Begin, EnumT End, force_iteration_on_noniterable_enum_t) { + return iota_range<EnumT>(Begin, End, false); +} + +/// Iterate over an enum type from Begin to End inclusive. +/// Note: `enum_seq_inclusive` will generate each consecutive value, even if no +/// enumerator with that value exists. +/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1] +/// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse +/// iteration). +template <typename EnumT, + typename = std::enable_if_t<std::is_enum<EnumT>::value>> +auto enum_seq_inclusive(EnumT Begin, EnumT End) { + static_assert(enum_iteration_traits<EnumT>::is_iterable, + "Enum type is not marked as iterable."); + return iota_range<EnumT>(Begin, End, true); +} + +/// Iterate over an enum type from Begin to End inclusive, even when `EnumT` +/// is not marked as safely iterable by `enum_iteration_traits`. +/// Note: `enum_seq_inclusive` will generate each consecutive value, even if no +/// enumerator with that value exists. +/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1] +/// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse +/// iteration). +template <typename EnumT, + typename = std::enable_if_t<std::is_enum<EnumT>::value>> +auto enum_seq_inclusive(EnumT Begin, EnumT End, + force_iteration_on_noniterable_enum_t) { + return iota_range<EnumT>(Begin, End, true); +} + } // end namespace llvm #endif // LLVM_ADT_SEQUENCE_H diff --git a/llvm/include/llvm/ADT/SetOperations.h b/llvm/include/llvm/ADT/SetOperations.h index 62f1d26dc1c2..3e30b6bb83d3 100644 --- a/llvm/include/llvm/ADT/SetOperations.h +++ b/llvm/include/llvm/ADT/SetOperations.h @@ -77,15 +77,6 @@ bool set_is_subset(const S1Ty &S1, const S2Ty &S2) { return true; } -/// set_is_strict_subset(A, B) - Return true iff A in B and and A != B -/// -template <class S1Ty, class S2Ty> -bool set_is_strict_subset(const S1Ty &S1, const S2Ty &S2) { - if (S1.size() >= S2.size()) - return false; - return set_is_subset(S1, S2); -} - } // End llvm namespace #endif diff --git a/llvm/include/llvm/ADT/SmallBitVector.h b/llvm/include/llvm/ADT/SmallBitVector.h index f570bac23ad5..51ee5dbbce05 100644 --- a/llvm/include/llvm/ADT/SmallBitVector.h +++ b/llvm/include/llvm/ADT/SmallBitVector.h @@ -60,7 +60,7 @@ class SmallBitVector { "Unsupported word size"); public: - using size_type = unsigned; + using size_type = uintptr_t; // Encapsulation of a single bit. class reference { @@ -96,7 +96,7 @@ private: return reinterpret_cast<BitVector *>(X); } - void switchToSmall(uintptr_t NewSmallBits, size_t NewSize) { + void switchToSmall(uintptr_t NewSmallBits, size_type NewSize) { X = 1; setSmallSize(NewSize); setSmallBits(NewSmallBits); @@ -120,9 +120,11 @@ private: } // Return the size. - size_t getSmallSize() const { return getSmallRawBits() >> SmallNumDataBits; } + size_type getSmallSize() const { + return getSmallRawBits() >> SmallNumDataBits; + } - void setSmallSize(size_t Size) { + void setSmallSize(size_type Size) { setSmallRawBits(getSmallBits() | (Size << SmallNumDataBits)); } @@ -189,7 +191,7 @@ public: } /// Returns the number of bits in this bitvector. - size_t size() const { + size_type size() const { return isSmall() ? getSmallSize() : getPointer()->size(); } @@ -336,8 +338,8 @@ public: } else { BitVector *BV = new BitVector(N, t); uintptr_t OldBits = getSmallBits(); - for (size_t i = 0, e = getSmallSize(); i != e; ++i) - (*BV)[i] = (OldBits >> i) & 1; + for (size_type I = 0, E = getSmallSize(); I != E; ++I) + (*BV)[I] = (OldBits >> I) & 1; switchToLarge(BV); } } @@ -346,11 +348,11 @@ public: if (isSmall()) { if (N > SmallNumDataBits) { uintptr_t OldBits = getSmallRawBits(); - size_t SmallSize = getSmallSize(); + size_type SmallSize = getSmallSize(); BitVector *BV = new BitVector(SmallSize); - for (size_t i = 0; i < SmallSize; ++i) - if ((OldBits >> i) & 1) - BV->set(i); + for (size_type I = 0; I < SmallSize; ++I) + if ((OldBits >> I) & 1) + BV->set(I); BV->reserve(N); switchToLarge(BV); } @@ -491,8 +493,8 @@ public: else if (!isSmall() && !RHS.isSmall()) return *getPointer() == *RHS.getPointer(); else { - for (size_t i = 0, e = size(); i != e; ++i) { - if ((*this)[i] != RHS[i]) + for (size_type I = 0, E = size(); I != E; ++I) { + if ((*this)[I] != RHS[I]) return false; } return true; @@ -512,11 +514,11 @@ public: else if (!isSmall() && !RHS.isSmall()) getPointer()->operator&=(*RHS.getPointer()); else { - size_t i, e; - for (i = 0, e = std::min(size(), RHS.size()); i != e; ++i) - (*this)[i] = test(i) && RHS.test(i); - for (e = size(); i != e; ++i) - reset(i); + size_type I, E; + for (I = 0, E = std::min(size(), RHS.size()); I != E; ++I) + (*this)[I] = test(I) && RHS.test(I); + for (E = size(); I != E; ++I) + reset(I); } return *this; } @@ -561,8 +563,8 @@ public: else if (!isSmall() && !RHS.isSmall()) getPointer()->operator|=(*RHS.getPointer()); else { - for (size_t i = 0, e = RHS.size(); i != e; ++i) - (*this)[i] = test(i) || RHS.test(i); + for (size_type I = 0, E = RHS.size(); I != E; ++I) + (*this)[I] = test(I) || RHS.test(I); } return *this; } @@ -574,8 +576,8 @@ public: else if (!isSmall() && !RHS.isSmall()) getPointer()->operator^=(*RHS.getPointer()); else { - for (size_t i = 0, e = RHS.size(); i != e; ++i) - (*this)[i] = test(i) != RHS.test(i); + for (size_type I = 0, E = RHS.size(); I != E; ++I) + (*this)[I] = test(I) != RHS.test(I); } return *this; } @@ -721,8 +723,9 @@ template <> struct DenseMapInfo<SmallBitVector> { } static unsigned getHashValue(const SmallBitVector &V) { uintptr_t Store; - return DenseMapInfo<std::pair<unsigned, ArrayRef<uintptr_t>>>::getHashValue( - std::make_pair(V.size(), V.getData(Store))); + return DenseMapInfo< + std::pair<SmallBitVector::size_type, ArrayRef<uintptr_t>>>:: + getHashValue(std::make_pair(V.size(), V.getData(Store))); } static bool isEqual(const SmallBitVector &LHS, const SmallBitVector &RHS) { if (LHS.isInvalid() || RHS.isInvalid()) diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index b8a11030fc33..0d13524f25ce 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -1239,13 +1239,22 @@ inline size_t capacity_in_bytes(const SmallVector<T, N> &X) { return X.capacity_in_bytes(); } +template <typename RangeType> +using ValueTypeFromRangeType = + typename std::remove_const<typename std::remove_reference< + decltype(*std::begin(std::declval<RangeType &>()))>::type>::type; + /// Given a range of type R, iterate the entire range and return a /// SmallVector with elements of the vector. This is useful, for example, /// when you want to iterate a range and then sort the results. template <unsigned Size, typename R> -SmallVector<typename std::remove_const<typename std::remove_reference< - decltype(*std::begin(std::declval<R &>()))>::type>::type, - Size> +SmallVector<ValueTypeFromRangeType<R>, Size> to_vector(R &&Range) { + return {std::begin(Range), std::end(Range)}; +} +template <typename R> +SmallVector<ValueTypeFromRangeType<R>, + CalculateSmallVectorDefaultInlinedElements< + ValueTypeFromRangeType<R>>::value> to_vector(R &&Range) { return {std::begin(Range), std::end(Range)}; } diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h index 6bda25b85313..2ca672e7855b 100644 --- a/llvm/include/llvm/ADT/StringExtras.h +++ b/llvm/include/llvm/ADT/StringExtras.h @@ -67,22 +67,27 @@ inline ArrayRef<uint8_t> arrayRefFromStringRef(StringRef Input) { /// /// If \p C is not a valid hex digit, -1U is returned. inline unsigned hexDigitValue(char C) { - struct HexTable { - unsigned LUT[255] = {}; - constexpr HexTable() { - // Default initialize everything to invalid. - for (int i = 0; i < 255; ++i) - LUT[i] = ~0U; - // Initialize `0`-`9`. - for (int i = 0; i < 10; ++i) - LUT['0' + i] = i; - // Initialize `A`-`F` and `a`-`f`. - for (int i = 0; i < 6; ++i) - LUT['A' + i] = LUT['a' + i] = 10 + i; - } + /* clang-format off */ + static const int16_t LUT[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // '0'..'9' + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 'A'..'F' + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 'a'..'f' + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; - constexpr HexTable Table; - return Table.LUT[static_cast<unsigned char>(C)]; + /* clang-format on */ + return LUT[static_cast<unsigned char>(C)]; } /// Checks if character \p C is one of the 10 decimal digits. @@ -210,24 +215,31 @@ inline bool tryGetFromHex(StringRef Input, std::string &Output) { if (Input.empty()) return true; - Output.reserve((Input.size() + 1) / 2); + // If the input string is not properly aligned on 2 nibbles we pad out the + // front with a 0 prefix; e.g. `ABC` -> `0ABC`. + Output.resize((Input.size() + 1) / 2); + char *OutputPtr = const_cast<char *>(Output.data()); if (Input.size() % 2 == 1) { uint8_t Hex = 0; if (!tryGetHexFromNibbles('0', Input.front(), Hex)) return false; - - Output.push_back(Hex); + *OutputPtr++ = Hex; Input = Input.drop_front(); } - assert(Input.size() % 2 == 0); - while (!Input.empty()) { + // Convert the nibble pairs (e.g. `9C`) into bytes (0x9C). + // With the padding above we know the input is aligned and the output expects + // exactly half as many bytes as nibbles in the input. + size_t InputSize = Input.size(); + assert(InputSize % 2 == 0); + const char *InputPtr = Input.data(); + for (size_t OutputIndex = 0; OutputIndex < InputSize / 2; ++OutputIndex) { uint8_t Hex = 0; - if (!tryGetHexFromNibbles(Input[0], Input[1], Hex)) + if (!tryGetHexFromNibbles(InputPtr[OutputIndex * 2 + 0], // MSB + InputPtr[OutputIndex * 2 + 1], // LSB + Hex)) return false; - - Output.push_back(Hex); - Input = Input.drop_front(2); + OutputPtr[OutputIndex] = Hex; } return true; } @@ -501,6 +513,83 @@ public: } }; +/// A forward iterator over partitions of string over a separator. +class SplittingIterator + : public iterator_facade_base<SplittingIterator, std::forward_iterator_tag, + StringRef> { + char SeparatorStorage; + StringRef Current; + StringRef Next; + StringRef Separator; + +public: + SplittingIterator(StringRef Str, StringRef Separator) + : Next(Str), Separator(Separator) { + ++*this; + } + + SplittingIterator(StringRef Str, char Separator) + : SeparatorStorage(Separator), Next(Str), + Separator(&SeparatorStorage, 1) { + ++*this; + } + + SplittingIterator(const SplittingIterator &R) + : SeparatorStorage(R.SeparatorStorage), Current(R.Current), Next(R.Next), + Separator(R.Separator) { + if (R.Separator.data() == &R.SeparatorStorage) + Separator = StringRef(&SeparatorStorage, 1); + } + + SplittingIterator &operator=(const SplittingIterator &R) { + if (this == &R) + return *this; + + SeparatorStorage = R.SeparatorStorage; + Current = R.Current; + Next = R.Next; + Separator = R.Separator; + if (R.Separator.data() == &R.SeparatorStorage) + Separator = StringRef(&SeparatorStorage, 1); + return *this; + } + + bool operator==(const SplittingIterator &R) const { + assert(Separator == R.Separator); + return Current.data() == R.Current.data(); + } + + const StringRef &operator*() const { return Current; } + + StringRef &operator*() { return Current; } + + SplittingIterator &operator++() { + std::tie(Current, Next) = Next.split(Separator); + return *this; + } +}; + +/// Split the specified string over a separator and return a range-compatible +/// iterable over its partitions. Used to permit conveniently iterating +/// over separated strings like so: +/// +/// \code +/// for (StringRef x : llvm::split("foo,bar,baz", ",")) +/// ...; +/// \end +/// +/// Note that the passed string must remain valid throuhgout lifetime +/// of the iterators. +inline iterator_range<SplittingIterator> split(StringRef Str, StringRef Separator) { + return {SplittingIterator(Str, Separator), + SplittingIterator(StringRef(), Separator)}; +} + +inline iterator_range<SplittingIterator> split(StringRef Str, char Separator) { + return {SplittingIterator(Str, Separator), + SplittingIterator(StringRef(), Separator)}; +} + } // end namespace llvm #endif // LLVM_ADT_STRINGEXTRAS_H diff --git a/llvm/include/llvm/ADT/StringMap.h b/llvm/include/llvm/ADT/StringMap.h index a82afc9a817c..669956d41e0c 100644 --- a/llvm/include/llvm/ADT/StringMap.h +++ b/llvm/include/llvm/ADT/StringMap.h @@ -126,9 +126,7 @@ public: StringMap(std::initializer_list<std::pair<StringRef, ValueTy>> List) : StringMapImpl(List.size(), static_cast<unsigned>(sizeof(MapEntryTy))) { - for (const auto &P : List) { - insert(P); - } + insert(List); } StringMap(StringMap &&RHS) @@ -297,6 +295,21 @@ public: return try_emplace(KV.first, std::move(KV.second)); } + /// Inserts elements from range [first, last). If multiple elements in the + /// range have keys that compare equivalent, it is unspecified which element + /// is inserted . + template <typename InputIt> void insert(InputIt First, InputIt Last) { + for (InputIt It = First; It != Last; ++It) + insert(*It); + } + + /// Inserts elements from initializer list ilist. If multiple elements in + /// the range have keys that compare equivalent, it is unspecified which + /// element is inserted + void insert(std::initializer_list<std::pair<StringRef, ValueTy>> List) { + insert(List.begin(), List.end()); + } + /// Inserts an element or assigns to the current element if the key already /// exists. The return type is the same as try_emplace. template <typename V> @@ -465,13 +478,7 @@ public: explicit StringMapKeyIterator(StringMapConstIterator<ValueTy> Iter) : base(std::move(Iter)) {} - StringRef &operator*() { - Key = this->wrapped()->getKey(); - return Key; - } - -private: - StringRef Key; + StringRef operator*() const { return this->wrapped()->getKey(); } }; } // end namespace llvm diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index 17e64f7f81bb..9f4b89218042 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -35,7 +35,6 @@ namespace llvm { class APInt; class hash_code; template <typename T> class SmallVectorImpl; - template <typename T> struct DenseMapInfo; class StringRef; /// Helper functions for StringRef::getAsInteger. @@ -949,7 +948,7 @@ namespace llvm { hash_code hash_value(StringRef S); // Provide DenseMapInfo for StringRefs. - template <> struct DenseMapInfo<StringRef> { + template <> struct DenseMapInfo<StringRef, void> { static inline StringRef getEmptyKey() { return StringRef( reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0); diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index 76f3514050f0..2fd3047acbfd 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -93,6 +93,8 @@ public: hsail64, // AMD HSAIL with 64-bit pointers spir, // SPIR: standard portable IR for OpenCL 32-bit version spir64, // SPIR: standard portable IR for OpenCL 64-bit version + spirv32, // SPIR-V with 32-bit pointers + spirv64, // SPIR-V with 64-bit pointers kalimba, // Kalimba: generic kalimba shave, // SHAVE: Movidius vector VLIW processors lanai, // Lanai: Lanai 32-bit @@ -106,6 +108,9 @@ public: enum SubArchType { NoSubArch, + ARMSubArch_v9_2a, + ARMSubArch_v9_1a, + ARMSubArch_v9, ARMSubArch_v8_7a, ARMSubArch_v8_6a, ARMSubArch_v8_5a, @@ -290,10 +295,10 @@ public: /// @name Normalization /// @{ - /// normalize - Turn an arbitrary machine specification into the canonical - /// triple form (or something sensible that the Triple class understands if - /// nothing better can reasonably be done). In particular, it handles the - /// common case in which otherwise valid components are in the wrong order. + /// Turn an arbitrary machine specification into the canonical triple form (or + /// something sensible that the Triple class understands if nothing better can + /// reasonably be done). In particular, it handles the common case in which + /// otherwise valid components are in the wrong order. static std::string normalize(StringRef Str); /// Return the normalized form of this triple's string. @@ -303,25 +308,24 @@ public: /// @name Typed Component Access /// @{ - /// getArch - Get the parsed architecture type of this triple. + /// Get the parsed architecture type of this triple. ArchType getArch() const { return Arch; } - /// getSubArch - get the parsed subarchitecture type for this triple. + /// get the parsed subarchitecture type for this triple. SubArchType getSubArch() const { return SubArch; } - /// getVendor - Get the parsed vendor type of this triple. + /// Get the parsed vendor type of this triple. VendorType getVendor() const { return Vendor; } - /// getOS - Get the parsed operating system type of this triple. + /// Get the parsed operating system type of this triple. OSType getOS() const { return OS; } - /// hasEnvironment - Does this triple have the optional environment - /// (fourth) component? + /// Does this triple have the optional environment (fourth) component? bool hasEnvironment() const { return getEnvironmentName() != ""; } - /// getEnvironment - Get the parsed environment type of this triple. + /// Get the parsed environment type of this triple. EnvironmentType getEnvironment() const { return Environment; } /// Parse the version number from the OS name component of the @@ -333,39 +337,39 @@ public: void getEnvironmentVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const; - /// getFormat - Get the object format for this triple. + /// Get the object format for this triple. ObjectFormatType getObjectFormat() const { return ObjectFormat; } - /// getOSVersion - Parse the version number from the OS name component of the - /// triple, if present. + /// Parse the version number from the OS name component of the triple, if + /// present. /// /// For example, "fooos1.2.3" would return (1, 2, 3). /// /// If an entry is not defined, it will be returned as 0. void getOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const; - /// getOSMajorVersion - Return just the major version number, this is - /// specialized because it is a common query. + /// Return just the major version number, this is specialized because it is a + /// common query. unsigned getOSMajorVersion() const { unsigned Maj, Min, Micro; getOSVersion(Maj, Min, Micro); return Maj; } - /// getMacOSXVersion - Parse the version number as with getOSVersion and then - /// translate generic "darwin" versions to the corresponding OS X versions. - /// This may also be called with IOS triples but the OS X version number is - /// just set to a constant 10.4.0 in that case. Returns true if successful. + /// Parse the version number as with getOSVersion and then translate generic + /// "darwin" versions to the corresponding OS X versions. This may also be + /// called with IOS triples but the OS X version number is just set to a + /// constant 10.4.0 in that case. Returns true if successful. bool getMacOSXVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const; - /// getiOSVersion - Parse the version number as with getOSVersion. This should - /// only be called with IOS or generic triples. + /// Parse the version number as with getOSVersion. This should only be called + /// with IOS or generic triples. void getiOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const; - /// getWatchOSVersion - Parse the version number as with getOSVersion. This - /// should only be called with WatchOS or generic triples. + /// Parse the version number as with getOSVersion. This should only be called + /// with WatchOS or generic triples. void getWatchOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const; @@ -377,24 +381,24 @@ public: const std::string &getTriple() const { return Data; } - /// getArchName - Get the architecture (first) component of the - /// triple. + /// Get the architecture (first) component of the triple. StringRef getArchName() const; - /// getVendorName - Get the vendor (second) component of the triple. + /// Get the architecture name based on Kind and SubArch. + StringRef getArchName(ArchType Kind, SubArchType SubArch = NoSubArch) const; + + /// Get the vendor (second) component of the triple. StringRef getVendorName() const; - /// getOSName - Get the operating system (third) component of the - /// triple. + /// Get the operating system (third) component of the triple. StringRef getOSName() const; - /// getEnvironmentName - Get the optional environment (fourth) - /// component of the triple, or "" if empty. + /// Get the optional environment (fourth) component of the triple, or "" if + /// empty. StringRef getEnvironmentName() const; - /// getOSAndEnvironmentName - Get the operating system and optional - /// environment components as a single string (separated by a '-' - /// if the environment component is present). + /// Get the operating system and optional environment components as a single + /// string (separated by a '-' if the environment component is present). StringRef getOSAndEnvironmentName() const; /// @} @@ -420,8 +424,8 @@ public: /// Note that this tests for 16-bit pointer width, and nothing else. bool isArch16Bit() const; - /// isOSVersionLT - Helper function for doing comparisons against version - /// numbers included in the target triple. + /// Helper function for doing comparisons against version numbers included in + /// the target triple. bool isOSVersionLT(unsigned Major, unsigned Minor = 0, unsigned Micro = 0) const { unsigned LHS[3]; @@ -443,14 +447,13 @@ public: return isOSVersionLT(RHS[0], RHS[1], RHS[2]); } - /// isMacOSXVersionLT - Comparison function for checking OS X version - /// compatibility, which handles supporting skewed version numbering schemes - /// used by the "darwin" triples. + /// Comparison function for checking OS X version compatibility, which handles + /// supporting skewed version numbering schemes used by the "darwin" triples. bool isMacOSXVersionLT(unsigned Major, unsigned Minor = 0, unsigned Micro = 0) const; - /// isMacOSX - Is this a Mac OS X triple. For legacy reasons, we support both - /// "darwin" and "osx" as OS X triples. + /// Is this a Mac OS X triple. For legacy reasons, we support both "darwin" + /// and "osx" as OS X triples. bool isMacOSX() const { return getOS() == Triple::Darwin || getOS() == Triple::MacOSX; } @@ -480,7 +483,7 @@ public: bool isOSzOS() const { return getOS() == Triple::ZOS; } - /// isOSDarwin - Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS). + /// Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS). bool isOSDarwin() const { return isMacOSX() || isiOS() || isWatchOS(); } @@ -698,6 +701,11 @@ public: return getArch() == Triple::spir || getArch() == Triple::spir64; } + /// Tests whether the target is SPIR-V (32/64-bit). + bool isSPIRV() const { + return getArch() == Triple::spirv32 || getArch() == Triple::spirv64; + } + /// Tests whether the target is NVPTX (32- or 64-bit). bool isNVPTX() const { return getArch() == Triple::nvptx || getArch() == Triple::nvptx64; @@ -720,6 +728,19 @@ public: return getArch() == Triple::arm || getArch() == Triple::armeb; } + /// Tests whether the target supports the EHABI exception + /// handling standard. + bool isTargetEHABICompatible() const { + return (isARM() || isThumb()) && + (getEnvironment() == Triple::EABI || + getEnvironment() == Triple::GNUEABI || + getEnvironment() == Triple::MuslEABI || + getEnvironment() == Triple::EABIHF || + getEnvironment() == Triple::GNUEABIHF || + getEnvironment() == Triple::MuslEABIHF || isAndroid()) && + isOSBinFormatELF(); + } + /// Tests whether the target is AArch64 (little and big endian). bool isAArch64() const { return getArch() == Triple::aarch64 || getArch() == Triple::aarch64_be || @@ -833,46 +854,38 @@ public: /// @name Mutators /// @{ - /// setArch - Set the architecture (first) component of the triple - /// to a known type. - void setArch(ArchType Kind); + /// Set the architecture (first) component of the triple to a known type. + void setArch(ArchType Kind, SubArchType SubArch = NoSubArch); - /// setVendor - Set the vendor (second) component of the triple to a - /// known type. + /// Set the vendor (second) component of the triple to a known type. void setVendor(VendorType Kind); - /// setOS - Set the operating system (third) component of the triple - /// to a known type. + /// Set the operating system (third) component of the triple to a known type. void setOS(OSType Kind); - /// setEnvironment - Set the environment (fourth) component of the triple - /// to a known type. + /// Set the environment (fourth) component of the triple to a known type. void setEnvironment(EnvironmentType Kind); - /// setObjectFormat - Set the object file format + /// Set the object file format. void setObjectFormat(ObjectFormatType Kind); - /// setTriple - Set all components to the new triple \p Str. + /// Set all components to the new triple \p Str. void setTriple(const Twine &Str); - /// setArchName - Set the architecture (first) component of the - /// triple by name. + /// Set the architecture (first) component of the triple by name. void setArchName(StringRef Str); - /// setVendorName - Set the vendor (second) component of the triple - /// by name. + /// Set the vendor (second) component of the triple by name. void setVendorName(StringRef Str); - /// setOSName - Set the operating system (third) component of the - /// triple by name. + /// Set the operating system (third) component of the triple by name. void setOSName(StringRef Str); - /// setEnvironmentName - Set the optional environment (fourth) - /// component of the triple by name. + /// Set the optional environment (fourth) component of the triple by name. void setEnvironmentName(StringRef Str); - /// setOSAndEnvironmentName - Set the operating system and optional - /// environment components with a single string. + /// Set the operating system and optional environment components with a single + /// string. void setOSAndEnvironmentName(StringRef Str); /// @} @@ -938,33 +951,30 @@ public: /// @name Static helpers for IDs. /// @{ - /// getArchTypeName - Get the canonical name for the \p Kind architecture. + /// Get the canonical name for the \p Kind architecture. static StringRef getArchTypeName(ArchType Kind); - /// getArchTypePrefix - Get the "prefix" canonical name for the \p Kind - /// architecture. This is the prefix used by the architecture specific - /// builtins, and is suitable for passing to \see - /// Intrinsic::getIntrinsicForGCCBuiltin(). + /// Get the "prefix" canonical name for the \p Kind architecture. This is the + /// prefix used by the architecture specific builtins, and is suitable for + /// passing to \see Intrinsic::getIntrinsicForGCCBuiltin(). /// /// \return - The architecture prefix, or 0 if none is defined. static StringRef getArchTypePrefix(ArchType Kind); - /// getVendorTypeName - Get the canonical name for the \p Kind vendor. + /// Get the canonical name for the \p Kind vendor. static StringRef getVendorTypeName(VendorType Kind); - /// getOSTypeName - Get the canonical name for the \p Kind operating system. + /// Get the canonical name for the \p Kind operating system. static StringRef getOSTypeName(OSType Kind); - /// getEnvironmentTypeName - Get the canonical name for the \p Kind - /// environment. + /// Get the canonical name for the \p Kind environment. static StringRef getEnvironmentTypeName(EnvironmentType Kind); /// @} /// @name Static helpers for converting alternate architecture names. /// @{ - /// getArchTypeForLLVMName - The canonical type for the given LLVM - /// architecture name (e.g., "x86"). + /// The canonical type for the given LLVM architecture name (e.g., "x86"). static ArchType getArchTypeForLLVMName(StringRef Str); /// @} diff --git a/llvm/include/llvm/ADT/TypeSwitch.h b/llvm/include/llvm/ADT/TypeSwitch.h index 815b9a40afaf..3b7598f3251d 100644 --- a/llvm/include/llvm/ADT/TypeSwitch.h +++ b/llvm/include/llvm/ADT/TypeSwitch.h @@ -35,7 +35,12 @@ public: /// Invoke a case on the derived class with multiple case types. template <typename CaseT, typename CaseT2, typename... CaseTs, typename CallableT> - DerivedT &Case(CallableT &&caseFn) { + // This is marked always_inline and nodebug so it doesn't show up in stack + // traces at -O0 (or other optimization levels). Large TypeSwitch's are + // common, are equivalent to a switch, and don't add any value to stack + // traces. + LLVM_ATTRIBUTE_ALWAYS_INLINE LLVM_ATTRIBUTE_NODEBUG DerivedT & + Case(CallableT &&caseFn) { DerivedT &derived = static_cast<DerivedT &>(*this); return derived.template Case<CaseT>(caseFn) .template Case<CaseT2, CaseTs...>(caseFn); diff --git a/llvm/include/llvm/ADT/iterator.h b/llvm/include/llvm/ADT/iterator.h index b3c6608e9b6e..6f0c42fe08be 100644 --- a/llvm/include/llvm/ADT/iterator.h +++ b/llvm/include/llvm/ADT/iterator.h @@ -35,6 +35,21 @@ namespace llvm { /// terms of addition of one. These aren't equivalent for all iterator /// categories, and respecting that adds a lot of complexity for little gain. /// +/// Iterators are expected to have const rules analogous to pointers, with a +/// single, const-qualified operator*() that returns ReferenceT. This matches +/// the second and third pointers in the following example: +/// \code +/// int Value; +/// { int *I = &Value; } // ReferenceT 'int&' +/// { int *const I = &Value; } // ReferenceT 'int&'; const +/// { const int *I = &Value; } // ReferenceT 'const int&' +/// { const int *const I = &Value; } // ReferenceT 'const int&'; const +/// \endcode +/// If an iterator facade returns a handle to its own state, then T (and +/// PointerT and ReferenceT) should usually be const-qualified. Otherwise, if +/// clients are expected to modify the handle itself, the field can be declared +/// mutable or use const_cast. +/// /// Classes wishing to use `iterator_facade_base` should implement the following /// methods: /// @@ -42,8 +57,7 @@ namespace llvm { /// (All of the following methods) /// - DerivedT &operator=(const DerivedT &R); /// - bool operator==(const DerivedT &R) const; -/// - const T &operator*() const; -/// - T &operator*(); +/// - T &operator*() const; /// - DerivedT &operator++(); /// /// Bidirectional Iterators: @@ -95,6 +109,22 @@ protected: operator ReferenceT() const { return *I; } }; + /// A proxy object for computing a pointer via indirecting a copy of a + /// reference. This is used in APIs which need to produce a pointer but for + /// which the reference might be a temporary. The proxy preserves the + /// reference internally and exposes the pointer via a arrow operator. + class PointerProxy { + friend iterator_facade_base; + + ReferenceT R; + + template <typename RefT> + PointerProxy(RefT &&R) : R(std::forward<RefT>(R)) {} + + public: + PointerT operator->() const { return &R; } + }; + public: DerivedT operator+(DifferenceTypeT n) const { static_assert(std::is_base_of<iterator_facade_base, DerivedT>::value, @@ -172,19 +202,13 @@ public: return !(static_cast<const DerivedT &>(*this) < RHS); } - PointerT operator->() { return &static_cast<DerivedT *>(this)->operator*(); } - PointerT operator->() const { - return &static_cast<const DerivedT *>(this)->operator*(); - } - ReferenceProxy operator[](DifferenceTypeT n) { - static_assert(IsRandomAccess, - "Subscripting is only defined for random access iterators."); - return ReferenceProxy(static_cast<DerivedT *>(this)->operator+(n)); + PointerProxy operator->() const { + return static_cast<const DerivedT *>(this)->operator*(); } ReferenceProxy operator[](DifferenceTypeT n) const { static_assert(IsRandomAccess, "Subscripting is only defined for random access iterators."); - return ReferenceProxy(static_cast<const DerivedT *>(this)->operator+(n)); + return static_cast<const DerivedT *>(this)->operator+(n); } }; @@ -330,8 +354,7 @@ public: explicit pointer_iterator(WrappedIteratorT u) : pointer_iterator::iterator_adaptor_base(std::move(u)) {} - T &operator*() { return Ptr = &*this->I; } - const T &operator*() const { return Ptr = &*this->I; } + T &operator*() const { return Ptr = &*this->I; } }; template <typename RangeT, typename WrappedIteratorT = diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index 7fec0feb09d5..2770a1a9b277 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -61,6 +61,7 @@ class DominatorTree; class FenceInst; class Function; class InvokeInst; +class LoopInfo; class PreservedAnalyses; class TargetLibraryInfo; class Value; @@ -378,6 +379,50 @@ createModRefInfo(const FunctionModRefBehavior FMRB) { return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef)); } +/// Virtual base class for providers of capture information. +struct CaptureInfo { + virtual ~CaptureInfo() = 0; + virtual bool isNotCapturedBeforeOrAt(const Value *Object, + const Instruction *I) = 0; +}; + +/// Context-free CaptureInfo provider, which computes and caches whether an +/// object is captured in the function at all, but does not distinguish whether +/// it was captured before or after the context instruction. +class SimpleCaptureInfo final : public CaptureInfo { + SmallDenseMap<const Value *, bool, 8> IsCapturedCache; + +public: + bool isNotCapturedBeforeOrAt(const Value *Object, + const Instruction *I) override; +}; + +/// Context-sensitive CaptureInfo provider, which computes and caches the +/// earliest common dominator closure of all captures. It provides a good +/// approximation to a precise "captures before" analysis. +class EarliestEscapeInfo final : public CaptureInfo { + DominatorTree &DT; + const LoopInfo &LI; + + /// Map from identified local object to an instruction before which it does + /// not escape, or nullptr if it never escapes. The "earliest" instruction + /// may be a conservative approximation, e.g. the first instruction in the + /// function is always a legal choice. + DenseMap<const Value *, Instruction *> EarliestEscapes; + + /// Reverse map from instruction to the objects it is the earliest escape for. + /// This is used for cache invalidation purposes. + DenseMap<Instruction *, TinyPtrVector<const Value *>> Inst2Obj; + +public: + EarliestEscapeInfo(DominatorTree &DT, const LoopInfo &LI) : DT(DT), LI(LI) {} + + bool isNotCapturedBeforeOrAt(const Value *Object, + const Instruction *I) override; + + void removeInstruction(Instruction *I); +}; + /// Reduced version of MemoryLocation that only stores a pointer and size. /// Used for caching AATags independent BasicAA results. struct AACacheLoc { @@ -425,8 +470,7 @@ public: using AliasCacheT = SmallDenseMap<LocPair, CacheEntry, 8>; AliasCacheT AliasCache; - using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>; - IsCapturedCacheT IsCapturedCache; + CaptureInfo *CI; /// Query depth used to distinguish recursive queries. unsigned Depth = 0; @@ -439,18 +483,26 @@ public: /// assumption is disproven. SmallVector<AAQueryInfo::LocPair, 4> AssumptionBasedResults; - AAQueryInfo() : AliasCache(), IsCapturedCache() {} + AAQueryInfo(CaptureInfo *CI) : CI(CI) {} /// Create a new AAQueryInfo based on this one, but with the cache cleared. /// This is used for recursive queries across phis, where cache results may /// not be valid. AAQueryInfo withEmptyCache() { - AAQueryInfo NewAAQI; + AAQueryInfo NewAAQI(CI); NewAAQI.Depth = Depth; return NewAAQI; } }; +/// AAQueryInfo that uses SimpleCaptureInfo. +class SimpleAAQueryInfo : public AAQueryInfo { + SimpleCaptureInfo CI; + +public: + SimpleAAQueryInfo() : AAQueryInfo(&CI) {} +}; + class BatchAAResults; class AAResults { @@ -770,7 +822,7 @@ public: /// helpers above. ModRefInfo getModRefInfo(const Instruction *I, const Optional<MemoryLocation> &OptLoc) { - AAQueryInfo AAQIP; + SimpleAAQueryInfo AAQIP; return getModRefInfo(I, OptLoc, AAQIP); } @@ -797,7 +849,7 @@ public: ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT) { - AAQueryInfo AAQIP; + SimpleAAQueryInfo AAQIP; return callCapturesBefore(I, MemLoc, DT, AAQIP); } @@ -896,9 +948,12 @@ private: class BatchAAResults { AAResults &AA; AAQueryInfo AAQI; + SimpleCaptureInfo SimpleCI; public: - BatchAAResults(AAResults &AAR) : AA(AAR), AAQI() {} + BatchAAResults(AAResults &AAR) : AA(AAR), AAQI(&SimpleCI) {} + BatchAAResults(AAResults &AAR, CaptureInfo *CI) : AA(AAR), AAQI(CI) {} + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { return AA.alias(LocA, LocB, AAQI); } diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h index 49c0cd89a4db..77da19110246 100644 --- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h +++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h @@ -20,7 +20,6 @@ #include "llvm/ADT/DenseMap.h" namespace llvm { -class IntrinsicInst; class AssumptionCache; class DominatorTree; @@ -70,15 +69,15 @@ template<> struct DenseMapInfo<Attribute::AttrKind> { using RetainedKnowledgeKey = std::pair<Value *, Attribute::AttrKind>; struct MinMax { - unsigned Min; - unsigned Max; + uint64_t Min; + uint64_t Max; }; /// A mapping from intrinsics (=`llvm.assume` calls) to a value range /// (=knowledge) that is encoded in them. How the value range is interpreted /// depends on the RetainedKnowledgeKey that was used to get this out of the /// RetainedKnowledgeMap. -using Assume2KnowledgeMap = DenseMap<IntrinsicInst *, MinMax>; +using Assume2KnowledgeMap = DenseMap<AssumeInst *, MinMax>; using RetainedKnowledgeMap = DenseMap<RetainedKnowledgeKey, Assume2KnowledgeMap>; @@ -100,7 +99,7 @@ void fillMapFromAssume(AssumeInst &Assume, RetainedKnowledgeMap &Result); /// - ArgValue will be 4. struct RetainedKnowledge { Attribute::AttrKind AttrKind = Attribute::None; - unsigned ArgValue = 0; + uint64_t ArgValue = 0; Value *WasOn = nullptr; bool operator==(RetainedKnowledge Other) const { return AttrKind == Other.AttrKind && WasOn == Other.WasOn && diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h index 51d04bd8cf02..12dd9b04c932 100644 --- a/llvm/include/llvm/Analysis/AssumptionCache.h +++ b/llvm/include/llvm/Analysis/AssumptionCache.h @@ -29,6 +29,7 @@ namespace llvm { class AssumeInst; class Function; class raw_ostream; +class TargetTransformInfo; class Value; /// A cache of \@llvm.assume calls within a function. @@ -59,6 +60,8 @@ private: /// We track this to lazily populate our assumptions. Function &F; + TargetTransformInfo *TTI; + /// Vector of weak value handles to calls of the \@llvm.assume /// intrinsic. SmallVector<ResultElem, 4> AssumeHandles; @@ -103,7 +106,8 @@ private: public: /// Construct an AssumptionCache from a function by scanning all of /// its instructions. - AssumptionCache(Function &F) : F(F) {} + AssumptionCache(Function &F, TargetTransformInfo *TTI = nullptr) + : F(F), TTI(TTI) {} /// This cache is designed to be self-updating and so it should never be /// invalidated. @@ -174,9 +178,7 @@ class AssumptionAnalysis : public AnalysisInfoMixin<AssumptionAnalysis> { public: using Result = AssumptionCache; - AssumptionCache run(Function &F, FunctionAnalysisManager &) { - return AssumptionCache(F); - } + AssumptionCache run(Function &F, FunctionAnalysisManager &); }; /// Printer pass for the \c AssumptionAnalysis results. diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h index 991c0cbb642a..ed9d1ba4c5a7 100644 --- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -13,10 +13,8 @@ #ifndef LLVM_ANALYSIS_BASICALIASANALYSIS_H #define LLVM_ANALYSIS_BASICALIASANALYSIS_H -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" @@ -28,7 +26,6 @@ namespace llvm { struct AAMDNodes; -class APInt; class AssumptionCache; class BasicBlock; class DataLayout; @@ -98,71 +95,7 @@ public: FunctionModRefBehavior getModRefBehavior(const Function *Fn); private: - // A linear transformation of a Value; this class represents ZExt(SExt(V, - // SExtBits), ZExtBits) * Scale + Offset. - struct VariableGEPIndex { - // An opaque Value - we can't decompose this further. - const Value *V; - - // We need to track what extensions we've done as we consider the same Value - // with different extensions as different variables in a GEP's linear - // expression; - // e.g.: if V == -1, then sext(x) != zext(x). - unsigned ZExtBits; - unsigned SExtBits; - - APInt Scale; - - // Context instruction to use when querying information about this index. - const Instruction *CxtI; - - /// True if all operations in this expression are NSW. - bool IsNSW; - - void dump() const { - print(dbgs()); - dbgs() << "\n"; - } - void print(raw_ostream &OS) const { - OS << "(V=" << V->getName() - << ", zextbits=" << ZExtBits - << ", sextbits=" << SExtBits - << ", scale=" << Scale << ")"; - } - }; - - // Represents the internal structure of a GEP, decomposed into a base pointer, - // constant offsets, and variable scaled indices. - struct DecomposedGEP { - // Base pointer of the GEP - const Value *Base; - // Total constant offset from base. - APInt Offset; - // Scaled variable (non-constant) indices. - SmallVector<VariableGEPIndex, 4> VarIndices; - // Is GEP index scale compile-time constant. - bool HasCompileTimeConstantScale; - // Are all operations inbounds GEPs or non-indexing operations? - // (None iff expression doesn't involve any geps) - Optional<bool> InBounds; - - void dump() const { - print(dbgs()); - dbgs() << "\n"; - } - void print(raw_ostream &OS) const { - OS << "(DecomposedGEP Base=" << Base->getName() - << ", Offset=" << Offset - << ", VarIndices=["; - for (size_t i = 0; i < VarIndices.size(); i++) { - if (i != 0) - OS << ", "; - VarIndices[i].print(OS); - } - OS << "], HasCompileTimeConstantScale=" << HasCompileTimeConstantScale - << ")"; - } - }; + struct DecomposedGEP; /// Tracks phi nodes we have visited. /// @@ -187,10 +120,6 @@ private: DecomposeGEPExpression(const Value *V, const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT); - static bool isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp, - const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject, - LocationSize ObjectAccessSize); - /// A Heuristic for aliasGEP that searches for a constant offset /// between the variables. /// @@ -200,15 +129,14 @@ private: /// However, we know that, for all %x, zext(%x) != zext(%x + 1), even if /// the addition overflows. bool - constantOffsetHeuristic(const SmallVectorImpl<VariableGEPIndex> &VarIndices, - LocationSize V1Size, LocationSize V2Size, - const APInt &BaseOffset, AssumptionCache *AC, + constantOffsetHeuristic(const DecomposedGEP &GEP, LocationSize V1Size, + LocationSize V2Size, AssumptionCache *AC, DominatorTree *DT); bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2); - void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest, - const SmallVectorImpl<VariableGEPIndex> &Src); + void subtractDecomposedGEPs(DecomposedGEP &DestGEP, + const DecomposedGEP &SrcGEP); AliasResult aliasGEP(const GEPOperator *V1, LocationSize V1Size, const Value *V2, LocationSize V2Size, diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h index e361cccef960..7cf172dc1dd1 100644 --- a/llvm/include/llvm/Analysis/CGSCCPassManager.h +++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h @@ -20,7 +20,7 @@ /// A secondary more general goal is to be able to isolate optimization on /// unrelated parts of the IR module. This is useful to ensure our /// optimizations are principled and don't miss oportunities where refinement -/// of one part of the module influence transformations in another part of the +/// of one part of the module influences transformations in another part of the /// module. But this is also useful if we want to parallelize the optimizations /// across common large module graph shapes which tend to be very wide and have /// large regions of unrelated cliques. @@ -161,6 +161,12 @@ struct RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, CGSCCAnalysisManager, (void)AM.template getResult<AnalysisT>(C, CG); return PreservedAnalyses::all(); } + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName) { + auto ClassName = AnalysisT::name(); + auto PassName = MapClassName2PassName(ClassName); + OS << "require<" << PassName << ">"; + } }; /// A proxy from a \c CGSCCAnalysisManager to a \c Module. @@ -215,7 +221,7 @@ using ModuleAnalysisManagerCGSCCProxy = LazyCallGraph &>; /// Support structure for SCC passes to communicate updates the call graph back -/// to the CGSCC pass manager infrsatructure. +/// to the CGSCC pass manager infrastructure. /// /// The CGSCC pass manager runs SCC passes which are allowed to update the call /// graph and SCC structures. This means the structure the pass manager works @@ -274,22 +280,22 @@ struct CGSCCUpdateResult { /// If non-null, the updated current \c RefSCC being processed. /// - /// This is set when a graph refinement takes place an the "current" point in - /// the graph moves "down" or earlier in the post-order walk. This will often - /// cause the "current" RefSCC to be a newly created RefSCC object and the - /// old one to be added to the above worklist. When that happens, this + /// This is set when a graph refinement takes place and the "current" point + /// in the graph moves "down" or earlier in the post-order walk. This will + /// often cause the "current" RefSCC to be a newly created RefSCC object and + /// the old one to be added to the above worklist. When that happens, this /// pointer is non-null and can be used to continue processing the "top" of /// the post-order walk. LazyCallGraph::RefSCC *UpdatedRC; /// If non-null, the updated current \c SCC being processed. /// - /// This is set when a graph refinement takes place an the "current" point in - /// the graph moves "down" or earlier in the post-order walk. This will often - /// cause the "current" SCC to be a newly created SCC object and the old one - /// to be added to the above worklist. When that happens, this pointer is - /// non-null and can be used to continue processing the "top" of the - /// post-order walk. + /// This is set when a graph refinement takes place and the "current" point + /// in the graph moves "down" or earlier in the post-order walk. This will + /// often cause the "current" SCC to be a newly created SCC object and the + /// old one to be added to the above worklist. When that happens, this + /// pointer is non-null and can be used to continue processing the "top" of + /// the post-order walk. LazyCallGraph::SCC *UpdatedC; /// Preserved analyses across SCCs. @@ -298,7 +304,7 @@ struct CGSCCUpdateResult { /// (changing both the CG structure and the function IR itself). However, /// this means we need to take special care to correctly mark what analyses /// are preserved *across* SCCs. We have to track this out-of-band here - /// because within the main `PassManeger` infrastructure we need to mark + /// because within the main `PassManager` infrastructure we need to mark /// everything within an SCC as preserved in order to avoid repeatedly /// invalidating the same analyses as we unnest pass managers and adaptors. /// So we track the cross-SCC version of the preserved analyses here from any @@ -363,6 +369,13 @@ public: /// Runs the CGSCC pass across every SCC in the module. PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName) { + OS << "cgscc("; + Pass->printPipeline(OS, MapClassName2PassName); + OS << ")"; + } + static bool isRequired() { return true; } private: @@ -377,8 +390,11 @@ createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass) { using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT, PreservedAnalyses, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult &>; + // Do not use make_unique, it causes too many template instantiations, + // causing terrible compile times. return ModuleToPostOrderCGSCCPassAdaptor( - std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass))); + std::unique_ptr<ModuleToPostOrderCGSCCPassAdaptor::PassConceptT>( + new PassModelT(std::forward<CGSCCPassT>(Pass)))); } /// A proxy from a \c FunctionAnalysisManager to an \c SCC. @@ -461,11 +477,14 @@ class CGSCCToFunctionPassAdaptor public: using PassConceptT = detail::PassConcept<Function, FunctionAnalysisManager>; - explicit CGSCCToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass) - : Pass(std::move(Pass)) {} + explicit CGSCCToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass, + bool EagerlyInvalidate, bool NoRerun) + : Pass(std::move(Pass)), EagerlyInvalidate(EagerlyInvalidate), + NoRerun(NoRerun) {} CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg) - : Pass(std::move(Arg.Pass)) {} + : Pass(std::move(Arg.Pass)), EagerlyInvalidate(Arg.EagerlyInvalidate), + NoRerun(Arg.NoRerun) {} friend void swap(CGSCCToFunctionPassAdaptor &LHS, CGSCCToFunctionPassAdaptor &RHS) { @@ -481,24 +500,56 @@ public: PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName) { + OS << "function"; + if (EagerlyInvalidate) + OS << "<eager-inv>"; + OS << "("; + Pass->printPipeline(OS, MapClassName2PassName); + OS << ")"; + } + static bool isRequired() { return true; } private: std::unique_ptr<PassConceptT> Pass; + bool EagerlyInvalidate; + bool NoRerun; }; /// A function to deduce a function pass type and wrap it in the /// templated adaptor. template <typename FunctionPassT> CGSCCToFunctionPassAdaptor -createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass) { +createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, + bool EagerlyInvalidate = false, + bool NoRerun = false) { using PassModelT = detail::PassModel<Function, FunctionPassT, PreservedAnalyses, FunctionAnalysisManager>; + // Do not use make_unique, it causes too many template instantiations, + // causing terrible compile times. return CGSCCToFunctionPassAdaptor( - std::make_unique<PassModelT>(std::forward<FunctionPassT>(Pass))); + std::unique_ptr<CGSCCToFunctionPassAdaptor::PassConceptT>( + new PassModelT(std::forward<FunctionPassT>(Pass))), + EagerlyInvalidate, NoRerun); } +// A marker to determine if function passes should be run on a function within a +// CGSCCToFunctionPassAdaptor. This is used to prevent running an expensive +// function pass (manager) on a function multiple times if SCC mutations cause a +// function to be visited multiple times and the function is not modified by +// other SCC passes. +class ShouldNotRunFunctionPassesAnalysis + : public AnalysisInfoMixin<ShouldNotRunFunctionPassesAnalysis> { +public: + static AnalysisKey Key; + struct Result {}; + + Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); } +}; + /// A helper that repeats an SCC pass each time an indirect call is refined to /// a direct call by that pass. /// @@ -528,6 +579,13 @@ public: PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName) { + OS << "devirt<" << MaxIterations << ">("; + Pass->printPipeline(OS, MapClassName2PassName); + OS << ")"; + } + private: std::unique_ptr<PassConceptT> Pass; int MaxIterations; @@ -541,8 +599,11 @@ DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT &&Pass, using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT, PreservedAnalyses, CGSCCAnalysisManager, LazyCallGraph &, CGSCCUpdateResult &>; + // Do not use make_unique, it causes too many template instantiations, + // causing terrible compile times. return DevirtSCCRepeatedPass( - std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass)), + std::unique_ptr<DevirtSCCRepeatedPass::PassConceptT>( + new PassModelT(std::forward<CGSCCPassT>(Pass))), MaxIterations); } diff --git a/llvm/include/llvm/Analysis/CaptureTracking.h b/llvm/include/llvm/Analysis/CaptureTracking.h index 9da5f18e944b..50d12db7a1c3 100644 --- a/llvm/include/llvm/Analysis/CaptureTracking.h +++ b/llvm/include/llvm/Analysis/CaptureTracking.h @@ -22,6 +22,8 @@ namespace llvm { class DataLayout; class Instruction; class DominatorTree; + class LoopInfo; + class Function; /// getDefaultMaxUsesToExploreForCaptureTracking - Return default value of /// the maximal number of uses to explore before giving up. It is used by @@ -55,10 +57,25 @@ namespace llvm { /// MaxUsesToExplore specifies how many uses the analysis should explore for /// one value before giving up due too "too many uses". If MaxUsesToExplore /// is zero, a default value is assumed. - bool PointerMayBeCapturedBefore( - const Value *V, bool ReturnCaptures, bool StoreCaptures, - const Instruction *I, const DominatorTree *DT, bool IncludeI = false, - unsigned MaxUsesToExplore = 0); + bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, + bool StoreCaptures, const Instruction *I, + const DominatorTree *DT, + bool IncludeI = false, + unsigned MaxUsesToExplore = 0, + const LoopInfo *LI = nullptr); + + // Returns the 'earliest' instruction that captures \p V in \F. An instruction + // A is considered earlier than instruction B, if A dominates B. If 2 escapes + // do not dominate each other, the terminator of the common dominator is + // chosen. If not all uses can be analyzed, the earliest escape is set to + // the first instruction in the function entry block. If \p V does not escape, + // nullptr is returned. Note that the caller of the function has to ensure + // that the instruction the result value is compared against is not in a + // cycle. + Instruction *FindEarliestCapture(const Value *V, Function &F, + bool ReturnCaptures, bool StoreCaptures, + const DominatorTree &DT, + unsigned MaxUsesToExplore = 0); /// This callback is used in conjunction with PointerMayBeCaptured. In /// addition to the interface here, you'll need to provide your own getters diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h index 62742fdf9a91..45fb879f0c1f 100644 --- a/llvm/include/llvm/Analysis/ConstantFolding.h +++ b/llvm/include/llvm/Analysis/ConstantFolding.h @@ -128,10 +128,25 @@ Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx); Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2, ArrayRef<int> Mask); -/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would -/// produce if it is constant and determinable. If this is not determinable, -/// return null. -Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, const DataLayout &DL); +/// Extract value of C at the given Offset reinterpreted as Ty. If bits past +/// the end of C are accessed, they are assumed to be poison. +Constant *ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, + const DataLayout &DL); + +/// Extract value of C reinterpreted as Ty. Same as previous API with zero +/// offset. +Constant *ConstantFoldLoadFromConst(Constant *C, Type *Ty, + const DataLayout &DL); + +/// Return the value that a load from C with offset Offset would produce if it +/// is constant and determinable. If this is not determinable, return null. +Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, + const DataLayout &DL); + +/// Return the value that a load from C would produce if it is constant and +/// determinable. If this is not determinable, return null. +Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, + const DataLayout &DL); /// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a /// getelementptr constantexpr, return the constant value being addressed by the @@ -140,13 +155,6 @@ Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE, Type *Ty, const DataLayout &DL); -/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr -/// indices (with an *implied* zero pointer index that is not in the list), -/// return the constant value being addressed by a virtual load, or null if -/// something is funny and we can't decide. -Constant *ConstantFoldLoadThroughGEPIndices(Constant *C, - ArrayRef<Constant *> Indices); - /// canConstantFoldCallTo - Return true if its even possible to fold a call to /// the specified function. bool canConstantFoldCallTo(const CallBase *Call, const Function *F); diff --git a/llvm/include/llvm/Analysis/CostModel.h b/llvm/include/llvm/Analysis/CostModel.h new file mode 100644 index 000000000000..649168050cec --- /dev/null +++ b/llvm/include/llvm/Analysis/CostModel.h @@ -0,0 +1,26 @@ +//===- CostModel.h - --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_COSTMODEL_H +#define LLVM_ANALYSIS_COSTMODEL_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +/// Printer pass for cost modeling results. +class CostModelPrinterPass : public PassInfoMixin<CostModelPrinterPass> { + raw_ostream &OS; + +public: + explicit CostModelPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // end namespace llvm + +#endif // LLVM_ANALYSIS_COSTMODEL_H diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h index 2658b6bbc80c..6e942530f253 100644 --- a/llvm/include/llvm/Analysis/Delinearization.h +++ b/llvm/include/llvm/Analysis/Delinearization.h @@ -16,10 +16,115 @@ #ifndef LLVM_ANALYSIS_DELINEARIZATION_H #define LLVM_ANALYSIS_DELINEARIZATION_H +#include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/raw_ostream.h" namespace llvm { +class GetElementPtrInst; +class ScalarEvolution; +class SCEV; + +/// Compute the array dimensions Sizes from the set of Terms extracted from +/// the memory access function of this SCEVAddRecExpr (second step of +/// delinearization). +void findArrayDimensions(ScalarEvolution &SE, + SmallVectorImpl<const SCEV *> &Terms, + SmallVectorImpl<const SCEV *> &Sizes, + const SCEV *ElementSize); + +/// Collect parametric terms occurring in step expressions (first step of +/// delinearization). +void collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr, + SmallVectorImpl<const SCEV *> &Terms); + +/// Return in Subscripts the access functions for each dimension in Sizes +/// (third step of delinearization). +void computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr, + SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes); +/// Split this SCEVAddRecExpr into two vectors of SCEVs representing the +/// subscripts and sizes of an array access. +/// +/// The delinearization is a 3 step process: the first two steps compute the +/// sizes of each subscript and the third step computes the access functions +/// for the delinearized array: +/// +/// 1. Find the terms in the step functions +/// 2. Compute the array size +/// 3. Compute the access function: divide the SCEV by the array size +/// starting with the innermost dimensions found in step 2. The Quotient +/// is the SCEV to be divided in the next step of the recursion. The +/// Remainder is the subscript of the innermost dimension. Loop over all +/// array dimensions computed in step 2. +/// +/// To compute a uniform array size for several memory accesses to the same +/// object, one can collect in step 1 all the step terms for all the memory +/// accesses, and compute in step 2 a unique array shape. This guarantees +/// that the array shape will be the same across all memory accesses. +/// +/// FIXME: We could derive the result of steps 1 and 2 from a description of +/// the array shape given in metadata. +/// +/// Example: +/// +/// A[][n][m] +/// +/// for i +/// for j +/// for k +/// A[j+k][2i][5i] = +/// +/// The initial SCEV: +/// +/// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k] +/// +/// 1. Find the different terms in the step functions: +/// -> [2*m, 5, n*m, n*m] +/// +/// 2. Compute the array size: sort and unique them +/// -> [n*m, 2*m, 5] +/// find the GCD of all the terms = 1 +/// divide by the GCD and erase constant terms +/// -> [n*m, 2*m] +/// GCD = m +/// divide by GCD -> [n, 2] +/// remove constant terms +/// -> [n] +/// size of the array is A[unknown][n][m] +/// +/// 3. Compute the access function +/// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m +/// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k +/// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k +/// The remainder is the subscript of the innermost array dimension: [5i]. +/// +/// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n +/// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k +/// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k +/// The Remainder is the subscript of the next array dimension: [2i]. +/// +/// The subscript of the outermost dimension is the Quotient: [j+k]. +/// +/// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i]. +void delinearize(ScalarEvolution &SE, const SCEV *Expr, + SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<const SCEV *> &Sizes, const SCEV *ElementSize); + +/// Gathers the individual index expressions from a GEP instruction. +/// +/// This function optimistically assumes the GEP references into a fixed size +/// array. If this is actually true, this function returns a list of array +/// subscript expressions in \p Subscripts and a list of integers describing +/// the size of the individual array dimensions in \p Sizes. Both lists have +/// either equal length or the size list is one element shorter in case there +/// is no known size available for the outermost array dimension. Returns true +/// if successful and false otherwise. +bool getIndexExpressionsFromGEP(ScalarEvolution &SE, + const GetElementPtrInst *GEP, + SmallVectorImpl<const SCEV *> &Subscripts, + SmallVectorImpl<int> &Sizes); + struct DelinearizationPrinterPass : public PassInfoMixin<DelinearizationPrinterPass> { explicit DelinearizationPrinterPass(raw_ostream &OS); diff --git a/llvm/include/llvm/Analysis/HeatUtils.h b/llvm/include/llvm/Analysis/HeatUtils.h index b665e211c6ac..9ecbbaf318da 100644 --- a/llvm/include/llvm/Analysis/HeatUtils.h +++ b/llvm/include/llvm/Analysis/HeatUtils.h @@ -1,9 +1,8 @@ //===-- HeatUtils.h - Utility for printing heat colors ----------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index b623b9ca58d8..51c5c620230b 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -110,7 +110,8 @@ enum InstrType { Legal, Illegal, Invisible }; /// by \ref isSameOperationAs. /// TODO: Handle GetElementPtrInsts, as some of the operands have to be the /// exact same, and some do not. -struct IRInstructionData : ilist_node<IRInstructionData> { +struct IRInstructionData + : ilist_node<IRInstructionData, ilist_sentinel_tracking<true>> { /// The source Instruction that is being wrapped. Instruction *Inst = nullptr; @@ -127,12 +128,41 @@ struct IRInstructionData : ilist_node<IRInstructionData> { /// to a less than form. It is None otherwise. Optional<CmpInst::Predicate> RevisedPredicate; + /// This structure holds the distances of how far "ahead of" or "behind" the + /// target blocks of a branch, or the incoming blocks of a phi nodes are. + /// If the value is negative, it means that the block was registered before + /// the block of this instruction in terms of blocks in the function. + /// Code Example: + /// \code + /// block_1: + /// br i1 %0, label %block_2, label %block_3 + /// block_2: + /// br i1 %1, label %block_1, label %block_2 + /// block_3: + /// br i1 %2, label %block_2, label %block_1 + /// ; Replacing the labels with relative values, this becomes: + /// block_1: + /// br i1 %0, distance 1, distance 2 + /// block_2: + /// br i1 %1, distance -1, distance 0 + /// block_3: + /// br i1 %2, distance -1, distance -2 + /// \endcode + /// Taking block_2 as our example, block_1 is "behind" block_2, and block_2 is + /// "ahead" of block_2. + SmallVector<int, 4> RelativeBlockLocations; + /// Gather the information that is difficult to gather for an Instruction, or /// is changed. i.e. the operands of an Instruction and the Types of those /// operands. This extra information allows for similarity matching to make /// assertions that allow for more flexibility when checking for whether an /// Instruction performs the same operation. IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL); + IRInstructionData(IRInstructionDataList &IDL); + + /// Fills data stuctures for IRInstructionData when it is constructed from a + // reference or a pointer. + void initializeInstruction(); /// Get the predicate that the compare instruction is using for hashing the /// instruction. the IRInstructionData must be wrapping a CmpInst. @@ -145,6 +175,16 @@ struct IRInstructionData : ilist_node<IRInstructionData> { /// \return the consistent comparison predicate. static CmpInst::Predicate predicateForConsistency(CmpInst *CI); + /// For an IRInstructionData containing a branch, finds the + /// relative distances from the source basic block to the target by taking + /// the difference of the number assigned to the current basic block and the + /// target basic block of the branch. + /// + /// \param BasicBlockToInteger - The mapping of basic blocks to their location + /// in the module. + void + setBranchSuccessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger); + /// Hashes \p Value based on its opcode, types, and operand types. /// Two IRInstructionData instances produce the same hash when they perform /// the same operation. @@ -198,7 +238,8 @@ struct IRInstructionData : ilist_node<IRInstructionData> { IRInstructionDataList *IDL = nullptr; }; -struct IRInstructionDataList : simple_ilist<IRInstructionData> {}; +struct IRInstructionDataList + : simple_ilist<IRInstructionData, ilist_sentinel_tracking<true>> {}; /// Compare one IRInstructionData class to another IRInstructionData class for /// whether they are performing a the same operation, and can mapped to the @@ -288,6 +329,10 @@ struct IRInstructionMapper { DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits> InstructionIntegerMap; + /// A mapping for a basic block in a module to its assigned number/location + /// in the module. + DenseMap<BasicBlock *, unsigned> BasicBlockToInteger; + /// Set if we added an illegal number in the previous step. /// Since each illegal number is unique, we only need one of them between /// each range of legal numbers. This lets us make sure we don't add more @@ -322,6 +367,14 @@ struct IRInstructionMapper { IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL); + /// Get an empty allocated IRInstructionData struct using the + /// InstDataAllocator. + /// + /// \param IDL - The InstructionDataList that the IRInstructionData is + /// inserted into. + /// \returns An allocated IRInstructionData struct. + IRInstructionData *allocateIRInstructionData(IRInstructionDataList &IDL); + /// Get an allocated IRInstructionDataList object using the IDLAllocator. /// /// \returns An allocated IRInstructionDataList object. @@ -329,6 +382,24 @@ struct IRInstructionMapper { IRInstructionDataList *IDL = nullptr; + /// Assigns values to all the basic blocks in function \p F starting from + /// integer \p BBNumber. + /// + /// \param F - The function containing the basic blocks to assign numbers to. + /// \param BBNumber - The number to start from. + void initializeForBBs(Function &F, unsigned &BBNumber) { + for (BasicBlock &BB : F) + BasicBlockToInteger.insert(std::make_pair(&BB, BBNumber++)); + } + + /// Assigns values to all the basic blocks in Module \p M. + /// \param M - The module containing the basic blocks to assign numbers to. + void initializeForBBs(Module &M) { + unsigned BBNumber = 0; + for (Function &F : M) + initializeForBBs(F, BBNumber); + } + /// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers /// determined by \p InstrType. Two Instructions are mapped to the same value /// if they are close as defined by the InstructionData class above. @@ -386,7 +457,11 @@ struct IRInstructionMapper { InstructionClassification() {} // TODO: Determine a scheme to resolve when the label is similar enough. - InstrType visitBranchInst(BranchInst &BI) { return Illegal; } + InstrType visitBranchInst(BranchInst &BI) { + if (EnableBranches) + return Legal; + return Illegal; + } // TODO: Determine a scheme to resolve when the labels are similar enough. InstrType visitPHINode(PHINode &PN) { return Illegal; } // TODO: Handle allocas. @@ -419,6 +494,10 @@ struct IRInstructionMapper { // TODO: Handle interblock similarity. InstrType visitTerminator(Instruction &I) { return Illegal; } InstrType visitInstruction(Instruction &I) { return Legal; } + + // The flag variable that lets the classifier know whether we should + // allow branches to be checked for similarity. + bool EnableBranches = false; }; /// Maps an Instruction to a member of InstrType. @@ -488,6 +567,12 @@ private: DenseMap<Value *, unsigned> ValueToNumber; /// Stores the mapping of the number to the value assigned this number. DenseMap<unsigned, Value *> NumberToValue; + /// Stores the mapping of a value's number to canonical numbering in the + /// candidate's respective similarity group. + DenseMap<unsigned, unsigned> NumberToCanonNum; + /// Stores the mapping of canonical number in the candidate's respective + /// similarity group to a value number. + DenseMap<unsigned, unsigned> CanonNumToNumber; /// @} public: @@ -506,13 +591,27 @@ public: static bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B); - /// \param A - The first IRInstructionCandidate to compare. - /// \param B - The second IRInstructionCandidate to compare. + /// \param [in] A - The first IRInstructionCandidate to compare. + /// \param [in] B - The second IRInstructionCandidate to compare. /// \returns True when every IRInstructionData in \p A is structurally similar /// to \p B. static bool compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B); + /// \param [in] A - The first IRInstructionCandidate to compare. + /// \param [in] B - The second IRInstructionCandidate to compare. + /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from + /// candidate \p A to candidate \B. + /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from + /// candidate \p B to candidate \A. + /// \returns True when every IRInstructionData in \p A is structurally similar + /// to \p B. + static bool + compareStructure(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B, + DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, + DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB); + struct OperandMapping { /// The IRSimilarityCandidate that holds the instruction the OperVals were /// pulled from. @@ -526,6 +625,21 @@ public: DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping; }; + /// A helper struct to hold the candidate, for a branch instruction, the + /// relative location of a label, and the label itself. This is mostly to + /// group the values together before passing them as a bundle to a function. + struct RelativeLocMapping { + /// The IRSimilarityCandidate that holds the instruction the relative + /// location was pulled from. + const IRSimilarityCandidate &IRSC; + + /// The relative location to be analyzed. + int RelativeLocation; + + /// The corresponding value. + Value *OperVal; + }; + /// Compare the operands in \p A and \p B and check that the current mapping /// of global value numbers from \p A to \p B and \p B to \A is consistent. /// @@ -549,6 +663,94 @@ public: static bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B); + /// Compare the relative locations in \p A and \p B and check that the + /// distances match if both locations are contained in the region, and that + /// the branches both point outside the region if they do not. + /// Example Region: + /// \code + /// entry: + /// br i1 %0, label %block_1, label %block_3 + /// block_0: + /// br i1 %0, label %block_1, label %block_2 + /// block_1: + /// br i1 %0, label %block_2, label %block_3 + /// block_2: + /// br i1 %1, label %block_1, label %block_4 + /// block_3: + /// br i1 %2, label %block_2, label %block_5 + /// \endcode + /// If we compare the branches in block_0 and block_1 the relative values are + /// 1 and 2 for both, so we consider this a match. + /// + /// If we compare the branches in entry and block_0 the relative values are + /// 2 and 3, and 1 and 2 respectively. Since these are not the same we do not + /// consider them a match. + /// + /// If we compare the branches in block_1 and block_2 the relative values are + /// 1 and 2, and -1 and None respectively. As a result we do not consider + /// these to be the same + /// + /// If we compare the branches in block_2 and block_3 the relative values are + /// -1 and None for both. We do consider these to be a match. + /// + /// \param A - The first IRInstructionCandidate, relative location value, + /// and incoming block. + /// \param B - The second IRInstructionCandidate, relative location value, + /// and incoming block. + /// \returns true if the relative locations match. + static bool checkRelativeLocations(RelativeLocMapping A, + RelativeLocMapping B); + + /// Create a mapping from the value numbering to a different separate set of + /// numbers. This will serve as a guide for relating one candidate to another. + /// The canonical number gives use the ability identify which global value + /// number in one candidate relates to the global value number in the other. + /// + /// \param [in, out] CurrCand - The IRSimilarityCandidate to create a + /// canonical numbering for. + static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand); + + /// Create a mapping for the value numbering of the calling + /// IRSimilarityCandidate, to a different separate set of numbers, based on + /// the canonical ordering in \p SourceCand. These are defined based on the + /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of + /// these relationships should have the same information, just in opposite + /// directions. + /// + /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a + /// canonical numbering from. + /// \param ToSourceMapping - The mapping of value numbers from this candidate + /// to \p SourceCand. + /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand + /// to this candidate. + void createCanonicalRelationFrom( + IRSimilarityCandidate &SourceCand, + DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, + DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping); + + /// \param [in,out] BBSet - The set to track the basic blocks. + void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const { + for (IRInstructionData &ID : *this) { + BasicBlock *BB = ID.Inst->getParent(); + if (BBSet.contains(BB)) + continue; + BBSet.insert(BB); + } + } + + /// \param [in,out] BBSet - The set to track the basic blocks. + /// \param [in,out] BBList - A list in order of use to track the basic blocks. + void getBasicBlocks(DenseSet<BasicBlock *> &BBSet, + SmallVector<BasicBlock *> &BBList) const { + for (IRInstructionData &ID : *this) { + BasicBlock *BB = ID.Inst->getParent(); + if (BBSet.contains(BB)) + continue; + BBSet.insert(BB); + BBList.push_back(BB); + } + } + /// Compare the start and end indices of the two IRSimilarityCandidates for /// whether they overlap. If the start instruction of one /// IRSimilarityCandidate is less than the end instruction of the other, and @@ -611,6 +813,32 @@ public: return VNIt->second; } + /// Find the canonical number from the global value number \p N stored in the + /// candidate. + /// + /// \param N - The global value number to find the canonical number for. + /// \returns An optional containing the value, and None if it could not be + /// found. + Optional<unsigned> getCanonicalNum(unsigned N) { + DenseMap<unsigned, unsigned>::iterator NCIt = NumberToCanonNum.find(N); + if (NCIt == NumberToCanonNum.end()) + return None; + return NCIt->second; + } + + /// Find the global value number from the canonical number \p N stored in the + /// candidate. + /// + /// \param N - The canonical number to find the global vlaue number for. + /// \returns An optional containing the value, and None if it could not be + /// found. + Optional<unsigned> fromCanonicalNum(unsigned N) { + DenseMap<unsigned, unsigned>::iterator CNIt = CanonNumToNumber.find(N); + if (CNIt == CanonNumToNumber.end()) + return None; + return CNIt->second; + } + /// \param RHS -The IRSimilarityCandidate to compare against /// \returns true if the IRSimilarityCandidate is occurs after the /// IRSimilarityCandidate in the program. @@ -623,6 +851,9 @@ public: iterator end() const { return std::next(iterator(back())); } }; +typedef DenseMap<IRSimilarityCandidate *, + DenseMap<unsigned, DenseSet<unsigned>>> + CandidateGVNMapping; typedef std::vector<IRSimilarityCandidate> SimilarityGroup; typedef std::vector<SimilarityGroup> SimilarityGroupList; @@ -651,8 +882,9 @@ typedef std::vector<SimilarityGroup> SimilarityGroupList; /// analyzing the module. class IRSimilarityIdentifier { public: - IRSimilarityIdentifier() - : Mapper(&InstDataAllocator, &InstDataListAllocator) {} + IRSimilarityIdentifier(bool MatchBranches = true) + : Mapper(&InstDataAllocator, &InstDataListAllocator), + EnableBranches(MatchBranches) {} private: /// Map the instructions in the module to unsigned integers, using mapping @@ -728,6 +960,10 @@ private: /// instance of IRInstructionData. IRInstructionMapper Mapper; + /// The flag variable that marks whether we should check branches for + /// similarity, or only look within basic blocks. + bool EnableBranches = true; + /// The SimilarityGroups found with the most recent run of \ref /// findSimilarity. None if there is no recent run. Optional<SimilarityGroupList> SimilarityCandidates; diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 82e1b14960bd..c26dbc457949 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -36,20 +36,24 @@ class DominatorTree; /// These are the kinds of recurrences that we support. enum class RecurKind { - None, ///< Not a recurrence. - Add, ///< Sum of integers. - Mul, ///< Product of integers. - Or, ///< Bitwise or logical OR of integers. - And, ///< Bitwise or logical AND of integers. - Xor, ///< Bitwise or logical XOR of integers. - SMin, ///< Signed integer min implemented in terms of select(cmp()). - SMax, ///< Signed integer max implemented in terms of select(cmp()). - UMin, ///< Unisgned integer min implemented in terms of select(cmp()). - UMax, ///< Unsigned integer max implemented in terms of select(cmp()). - FAdd, ///< Sum of floats. - FMul, ///< Product of floats. - FMin, ///< FP min implemented in terms of select(cmp()). - FMax ///< FP max implemented in terms of select(cmp()). + None, ///< Not a recurrence. + Add, ///< Sum of integers. + Mul, ///< Product of integers. + Or, ///< Bitwise or logical OR of integers. + And, ///< Bitwise or logical AND of integers. + Xor, ///< Bitwise or logical XOR of integers. + SMin, ///< Signed integer min implemented in terms of select(cmp()). + SMax, ///< Signed integer max implemented in terms of select(cmp()). + UMin, ///< Unisgned integer min implemented in terms of select(cmp()). + UMax, ///< Unsigned integer max implemented in terms of select(cmp()). + FAdd, ///< Sum of floats. + FMul, ///< Product of floats. + FMin, ///< FP min implemented in terms of select(cmp()). + FMax, ///< FP max implemented in terms of select(cmp()). + SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop + ///< invariant + SelectFCmp ///< Integer select(fcmp(),x,y) where one of (x,y) is loop + ///< invariant }; /// The RecurrenceDescriptor is used to identify recurrences variables in a @@ -112,12 +116,14 @@ public: }; /// Returns a struct describing if the instruction 'I' can be a recurrence - /// variable of type 'Kind'. If the recurrence is a min/max pattern of - /// select(icmp()) this function advances the instruction pointer 'I' from the - /// compare instruction to the select instruction and stores this pointer in - /// 'PatternLastInst' member of the returned struct. - static InstDesc isRecurrenceInstr(Instruction *I, RecurKind Kind, - InstDesc &Prev, FastMathFlags FMF); + /// variable of type 'Kind' for a Loop \p L and reduction PHI \p Phi. + /// If the recurrence is a min/max pattern of select(icmp()) this function + /// advances the instruction pointer 'I' from the compare instruction to the + /// select instruction and stores this pointer in 'PatternLastInst' member of + /// the returned struct. + static InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I, + RecurKind Kind, InstDesc &Prev, + FastMathFlags FuncFMF); /// Returns true if instruction I has multiple uses in Insts static bool hasMultipleUsesOf(Instruction *I, @@ -127,20 +133,29 @@ public: /// Returns true if all uses of the instruction I is within the Set. static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set); - /// Returns a struct describing if the instruction is a - /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y) - /// or max(X, Y). \p Prev specifies the description of an already processed - /// select instruction, so its corresponding cmp can be matched to it. - static InstDesc isMinMaxSelectCmpPattern(Instruction *I, - const InstDesc &Prev); + /// Returns a struct describing if the instruction is a llvm.(s/u)(min/max), + /// llvm.minnum/maxnum or a Select(ICmp(X, Y), X, Y) pair of instructions + /// corresponding to a min(X, Y) or max(X, Y), matching the recurrence kind \p + /// Kind. \p Prev specifies the description of an already processed select + /// instruction, so its corresponding cmp can be matched to it. + static InstDesc isMinMaxPattern(Instruction *I, RecurKind Kind, + const InstDesc &Prev); + + /// Returns a struct describing whether the instruction is either a + /// Select(ICmp(A, B), X, Y), or + /// Select(FCmp(A, B), X, Y) + /// where one of (X, Y) is a loop invariant integer and the other is a PHI + /// value. \p Prev specifies the description of an already processed select + /// instruction, so its corresponding cmp can be matched to it. + static InstDesc isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi, + Instruction *I, InstDesc &Prev); /// Returns a struct describing if the instruction is a /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern. static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I); /// Returns identity corresponding to the RecurrenceKind. - static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp, - FastMathFlags FMF); + Value *getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF); /// Returns the opcode corresponding to the RecurrenceKind. static unsigned getOpcode(RecurKind Kind); @@ -150,7 +165,7 @@ public: /// non-null, the minimal bit width needed to compute the reduction will be /// computed. static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, - FastMathFlags FMF, + FastMathFlags FuncFMF, RecurrenceDescriptor &RedDes, DemandedBits *DB = nullptr, AssumptionCache *AC = nullptr, @@ -220,6 +235,12 @@ public: return isIntMinMaxRecurrenceKind(Kind) || isFPMinMaxRecurrenceKind(Kind); } + /// Returns true if the recurrence kind is of the form + /// select(cmp(),x,y) where one of (x,y) is loop invariant. + static bool isSelectCmpRecurrenceKind(RecurKind Kind) { + return Kind == RecurKind::SelectICmp || Kind == RecurKind::SelectFCmp; + } + /// Returns the type of the recurrence. This type can be narrower than the /// actual type of the Phi if the recurrence has been type-promoted. Type *getRecurrenceType() const { return RecurrenceType; } @@ -329,6 +350,11 @@ public: : Instruction::BinaryOpsEnd; } + Type *getElementType() const { + assert(IK == IK_PtrInduction && "Only pointer induction has element type"); + return ElementType; + } + /// Returns a reference to the type cast instructions in the induction /// update chain, that are redundant when guarded with a runtime /// SCEV overflow check. @@ -340,6 +366,7 @@ private: /// Private constructor - used by \c isInductionPHI. InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step, BinaryOperator *InductionBinOp = nullptr, + Type *ElementType = nullptr, SmallVectorImpl<Instruction *> *Casts = nullptr); /// Start value. @@ -350,6 +377,9 @@ private: const SCEV *Step = nullptr; // Instruction that advances induction variable. BinaryOperator *InductionBinOp = nullptr; + // Element type for pointer induction variables. + // TODO: This can be dropped once support for typed pointers is removed. + Type *ElementType = nullptr; // Instructions used for type-casts of the induction variable, // that are redundant when guarded with a runtime SCEV overflow check. SmallVector<Instruction *, 2> RedundantCasts; diff --git a/llvm/include/llvm/Analysis/IVUsers.h b/llvm/include/llvm/Analysis/IVUsers.h index f8ea3bcca229..e2026a4d5875 100644 --- a/llvm/include/llvm/Analysis/IVUsers.h +++ b/llvm/include/llvm/Analysis/IVUsers.h @@ -157,9 +157,6 @@ public: /// dump - This method is used for debugging. void dump() const; - -protected: - bool AddUsersImpl(Instruction *I, SmallPtrSetImpl<Loop*> &SimpleLoopNests); }; Pass *createIVUsersPass(); diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h index c27aaf0db8f2..9f9bc3a5e71b 100644 --- a/llvm/include/llvm/Analysis/InlineAdvisor.h +++ b/llvm/include/llvm/Analysis/InlineAdvisor.h @@ -22,6 +22,7 @@ class CallBase; class Function; class Module; class OptimizationRemarkEmitter; +struct ReplayInlinerSettings; /// There are 3 scenarios we can use the InlineAdvisor: /// - Default - use manual heuristics. @@ -143,7 +144,11 @@ public: /// be up-to-date wrt previous inlining decisions. \p MandatoryOnly indicates /// only mandatory (always-inline) call sites should be recommended - this /// allows the InlineAdvisor track such inlininings. - /// Returns an InlineAdvice with the inlining recommendation. + /// Returns: + /// - An InlineAdvice with the inlining recommendation. + /// - Null when no recommendation is made (https://reviews.llvm.org/D110658). + /// TODO: Consider removing the Null return scenario by incorporating the + /// SampleProfile inliner into an InlineAdvisor std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB, bool MandatoryOnly = false); @@ -157,6 +162,12 @@ public: /// to prepare for a partial update. virtual void onPassExit() {} + /// Called when the module is invalidated. We let the advisor implementation + /// decide what to refresh - in the case of the development mode + /// implementation, for example, we wouldn't want to delete the whole object + /// and need to re-load the model evaluator. + virtual void onModuleInvalidated() {} + protected: InlineAdvisor(Module &M, FunctionAnalysisManager &FAM); virtual std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) = 0; @@ -219,15 +230,18 @@ public: InlineAdvisorAnalysis() = default; struct Result { Result(Module &M, ModuleAnalysisManager &MAM) : M(M), MAM(MAM) {} - bool invalidate(Module &, const PreservedAnalyses &, + bool invalidate(Module &, const PreservedAnalyses &PA, ModuleAnalysisManager::Invalidator &) { - // InlineAdvisor must be preserved across analysis invalidations. - return false; + if (Advisor && !PA.areAllPreserved()) + Advisor->onModuleInvalidated(); + // Check whether the analysis has been explicitly invalidated. Otherwise, + // it's stateless and remains preserved. + auto PAC = PA.getChecker<InlineAdvisorAnalysis>(); + return !PAC.preservedWhenStateless(); } bool tryCreate(InlineParams Params, InliningAdvisorMode Mode, - StringRef ReplayFile); + const ReplayInlinerSettings &ReplaySettings); InlineAdvisor *getAdvisor() const { return Advisor.get(); } - void clear() { Advisor.reset(); } private: Module &M; @@ -263,12 +277,16 @@ shouldInline(CallBase &CB, function_ref<InlineCost(CallBase &CB)> GetInlineCost, /// Emit ORE message. void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, - const Function &Caller, const InlineCost &IC, - bool ForProfileContext = false, + const Function &Caller, bool IsMandatory, + function_ref<void(OptimizationRemark &)> ExtraContext = {}, const char *PassName = nullptr); -/// get call site location as string -std::string getCallSiteLocation(DebugLoc DLoc); +/// Emit ORE message based in cost (default heuristic). +void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, + const BasicBlock *Block, const Function &Callee, + const Function &Caller, const InlineCost &IC, + bool ForProfileContext = false, + const char *PassName = nullptr); /// Add location info to ORE message. void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc); diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index 4e1b28d4633f..b22841343b1a 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -213,6 +213,9 @@ struct InlineParams { /// Indicate whether we should allow inline deferral. Optional<bool> EnableDeferral = true; + + /// Indicate whether we allow inlining for recursive call. + Optional<bool> AllowRecursiveCall = false; }; /// Generate the parameters to tune the inline cost analysis based only on the diff --git a/llvm/include/llvm/Analysis/InlineOrder.h b/llvm/include/llvm/Analysis/InlineOrder.h new file mode 100644 index 000000000000..def3192356f4 --- /dev/null +++ b/llvm/include/llvm/Analysis/InlineOrder.h @@ -0,0 +1,172 @@ +//===- InlineOrder.h - Inlining order abstraction -*- C++ ---*-------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_ANALYSIS_INLINEORDER_H +#define LLVM_ANALYSIS_INLINEORDER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include <algorithm> +#include <utility> + +namespace llvm { +class CallBase; +class Function; +class Module; + +template <typename T> class InlineOrder { +public: + using reference = T &; + using const_reference = const T &; + + virtual ~InlineOrder() {} + + virtual size_t size() = 0; + + virtual void push(const T &Elt) = 0; + + virtual T pop() = 0; + + virtual const_reference front() = 0; + + virtual void erase_if(function_ref<bool(T)> Pred) = 0; + + bool empty() { return !size(); } +}; + +template <typename T, typename Container = SmallVector<T, 16>> +class DefaultInlineOrder : public InlineOrder<T> { + using reference = T &; + using const_reference = const T &; + +public: + size_t size() override { return Calls.size() - FirstIndex; } + + void push(const T &Elt) override { Calls.push_back(Elt); } + + T pop() override { + assert(size() > 0); + return Calls[FirstIndex++]; + } + + const_reference front() override { + assert(size() > 0); + return Calls[FirstIndex]; + } + + void erase_if(function_ref<bool(T)> Pred) override { + Calls.erase(std::remove_if(Calls.begin() + FirstIndex, Calls.end(), Pred), + Calls.end()); + } + +private: + Container Calls; + size_t FirstIndex = 0; +}; + +class InlineSizePriority { +public: + InlineSizePriority(int Size) : Size(Size) {} + + static bool isMoreDesirable(const InlineSizePriority &S1, + const InlineSizePriority &S2) { + return S1.Size < S2.Size; + } + + static InlineSizePriority evaluate(CallBase *CB) { + Function *Callee = CB->getCalledFunction(); + return InlineSizePriority(Callee->getInstructionCount()); + } + + int Size; +}; + +template <typename PriorityT> +class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> { + using T = std::pair<CallBase *, int>; + using HeapT = std::pair<CallBase *, PriorityT>; + using reference = T &; + using const_reference = const T &; + + static bool cmp(const HeapT &P1, const HeapT &P2) { + return PriorityT::isMoreDesirable(P2.second, P1.second); + } + + // A call site could become less desirable for inlining because of the size + // growth from prior inlining into the callee. This method is used to lazily + // update the desirability of a call site if it's decreasing. It is only + // called on pop() or front(), not every time the desirability changes. When + // the desirability of the front call site decreases, an updated one would be + // pushed right back into the heap. For simplicity, those cases where + // the desirability of a call site increases are ignored here. + void adjust() { + bool Changed = false; + do { + CallBase *CB = Heap.front().first; + const PriorityT PreviousGoodness = Heap.front().second; + const PriorityT CurrentGoodness = PriorityT::evaluate(CB); + Changed = PriorityT::isMoreDesirable(PreviousGoodness, CurrentGoodness); + if (Changed) { + std::pop_heap(Heap.begin(), Heap.end(), cmp); + Heap.pop_back(); + Heap.push_back({CB, CurrentGoodness}); + std::push_heap(Heap.begin(), Heap.end(), cmp); + } + } while (Changed); + } + +public: + size_t size() override { return Heap.size(); } + + void push(const T &Elt) override { + CallBase *CB = Elt.first; + const int InlineHistoryID = Elt.second; + const PriorityT Goodness = PriorityT::evaluate(CB); + + Heap.push_back({CB, Goodness}); + std::push_heap(Heap.begin(), Heap.end(), cmp); + InlineHistoryMap[CB] = InlineHistoryID; + } + + T pop() override { + assert(size() > 0); + adjust(); + + CallBase *CB = Heap.front().first; + T Result = std::make_pair(CB, InlineHistoryMap[CB]); + InlineHistoryMap.erase(CB); + std::pop_heap(Heap.begin(), Heap.end(), cmp); + Heap.pop_back(); + return Result; + } + + const_reference front() override { + assert(size() > 0); + adjust(); + + CallBase *CB = Heap.front().first; + return *InlineHistoryMap.find(CB); + } + + void erase_if(function_ref<bool(T)> Pred) override { + auto PredWrapper = [=](HeapT P) -> bool { + return Pred(std::make_pair(P.first, 0)); + }; + llvm::erase_if(Heap, PredWrapper); + std::make_heap(Heap.begin(), Heap.end(), cmp); + } + +private: + SmallVector<HeapT, 16> Heap; + DenseMap<CallBase *, int> InlineHistoryMap; +}; +} // namespace llvm +#endif // LLVM_ANALYSIS_INLINEORDER_H diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h index efaf1847276b..f0f8e4bc9175 100644 --- a/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -248,7 +248,7 @@ Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const SimplifyQuery &Q); /// Given operands for a GetElementPtrInst, fold the result or return null. -Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, +Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, bool InBounds, const SimplifyQuery &Q); /// Given operands for an InsertValueInst, fold the result or return null. diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index ca276d2f3cf8..0580f4d7b226 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -145,7 +145,7 @@ public: /// around but clear them. explicit operator bool() const; - /// Returnss the \c Kind of the edge. + /// Returns the \c Kind of the edge. Kind getKind() const; /// Test whether the edge represents a direct call to a function. @@ -307,9 +307,9 @@ public: /// A node in the call graph. /// - /// This represents a single node. It's primary roles are to cache the list of - /// callees, de-duplicate and provide fast testing of whether a function is - /// a callee, and facilitate iteration of child nodes in the graph. + /// This represents a single node. Its primary roles are to cache the list of + /// callees, de-duplicate and provide fast testing of whether a function is a + /// callee, and facilitate iteration of child nodes in the graph. /// /// The node works much like an optional in order to lazily populate the /// edges of each node. Until populated, there are no edges. Once populated, @@ -392,7 +392,7 @@ public: /// Internal helper to directly replace the function with a new one. /// - /// This is used to facilitate tranfsormations which need to replace the + /// This is used to facilitate transformations which need to replace the /// formal Function object but directly move the body and users from one to /// the other. void replaceFunction(Function &NewF); @@ -419,7 +419,7 @@ public: /// outer structure. SCCs do not support mutation of the call graph, that /// must be done through the containing \c RefSCC in order to fully reason /// about the ordering and connections of the graph. - class SCC { + class LLVM_EXTERNAL_VISIBILITY SCC { friend class LazyCallGraph; friend class LazyCallGraph::Node; @@ -435,7 +435,7 @@ public: Nodes.clear(); } - /// Print a short descrtiption useful for debugging or logging. + /// Print a short description useful for debugging or logging. /// /// We print the function names in the SCC wrapped in '()'s and skipping /// the middle functions if there are a large number. @@ -467,9 +467,10 @@ public: /// Verify invariants about the SCC. /// /// This will attempt to validate all of the basic invariants within an - /// SCC, but not that it is a strongly connected componet per-se. Primarily - /// useful while building and updating the graph to check that basic - /// properties are in place rather than having inexplicable crashes later. + /// SCC, but not that it is a strongly connected component per se. + /// Primarily useful while building and updating the graph to check that + /// basic properties are in place rather than having inexplicable crashes + /// later. void verify(); #endif @@ -511,7 +512,7 @@ public: /// Provide a short name by printing this SCC to a std::string. /// - /// This copes with the fact that we don't have a name per-se for an SCC + /// This copes with the fact that we don't have a name per se for an SCC /// while still making the use of this in debugging and logging useful. std::string getName() const { std::string Name; @@ -644,7 +645,7 @@ public: /// Provide a short name by printing this RefSCC to a std::string. /// - /// This copes with the fact that we don't have a name per-se for an RefSCC + /// This copes with the fact that we don't have a name per se for an RefSCC /// while still making the use of this in debugging and logging useful. std::string getName() const { std::string Name; @@ -1085,47 +1086,9 @@ public: /// updates that set with every constant visited. /// /// For each defined function, calls \p Callback with that function. - template <typename CallbackT> static void visitReferences(SmallVectorImpl<Constant *> &Worklist, SmallPtrSetImpl<Constant *> &Visited, - CallbackT Callback) { - while (!Worklist.empty()) { - Constant *C = Worklist.pop_back_val(); - - if (Function *F = dyn_cast<Function>(C)) { - if (!F->isDeclaration()) - Callback(*F); - continue; - } - - // The blockaddress constant expression is a weird special case, we can't - // generically walk its operands the way we do for all other constants. - if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) { - // If we've already visited the function referred to by the block - // address, we don't need to revisit it. - if (Visited.count(BA->getFunction())) - continue; - - // If all of the blockaddress' users are instructions within the - // referred to function, we don't need to insert a cycle. - if (llvm::all_of(BA->users(), [&](User *U) { - if (Instruction *I = dyn_cast<Instruction>(U)) - return I->getFunction() == BA->getFunction(); - return false; - })) - continue; - - // Otherwise we should go visit the referred to function. - Visited.insert(BA->getFunction()); - Worklist.push_back(BA->getFunction()); - continue; - } - - for (Value *Op : C->operand_values()) - if (Visited.insert(cast<Constant>(Op)).second) - Worklist.push_back(cast<Constant>(Op)); - } - } + function_ref<void(Function &)> Callback); ///@} diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index 0a0ef1536caf..2b4edfac61fc 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -177,21 +177,11 @@ public: /// Register the location (instructions are given increasing numbers) /// of a write access. - void addAccess(StoreInst *SI) { - Value *Ptr = SI->getPointerOperand(); - Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx); - InstMap.push_back(SI); - ++AccessIdx; - } + void addAccess(StoreInst *SI); /// Register the location (instructions are given increasing numbers) /// of a write access. - void addAccess(LoadInst *LI) { - Value *Ptr = LI->getPointerOperand(); - Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx); - InstMap.push_back(LI); - ++AccessIdx; - } + void addAccess(LoadInst *LI); /// Check whether the dependencies between the accesses are safe. /// @@ -664,15 +654,14 @@ Value *stripIntegerCast(Value *V); /// If necessary this method will version the stride of the pointer according /// to \p PtrToStride and therefore add further predicates to \p PSE. /// -/// If \p OrigPtr is not null, use it to look up the stride value instead of \p -/// Ptr. \p PtrToStride provides the mapping between the pointer value and its +/// \p PtrToStride provides the mapping between the pointer value and its /// stride as collected by LoopVectorizationLegality::collectStridedAccess. const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, - Value *Ptr, Value *OrigPtr = nullptr); + Value *Ptr); -/// If the pointer has a constant stride return it in units of its -/// element size. Otherwise return zero. +/// If the pointer has a constant stride return it in units of the access type +/// size. Otherwise return zero. /// /// Ensure that it does not wrap in the address space, assuming the predicate /// associated with \p PSE is true. @@ -681,7 +670,8 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, /// to \p PtrToStride and therefore add further predicates to \p PSE. /// The \p Assume parameter indicates if we are allowed to make additional /// run-time assumptions. -int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, +int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, + const Loop *Lp, const ValueToValueMap &StridesMap = ValueToValueMap(), bool Assume = false, bool ShouldCheckWrap = true); diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h index 92db1d67fc4e..bc8a1e74e447 100644 --- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h +++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h @@ -58,6 +58,7 @@ struct LoopStandardAnalysisResults { TargetLibraryInfo &TLI; TargetTransformInfo &TTI; BlockFrequencyInfo *BFI; + BranchProbabilityInfo *BPI; MemorySSA *MSSA; }; diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index 164ec50e47bc..15c9d911ab80 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -527,7 +527,7 @@ extern template class LoopBase<BasicBlock, Loop>; /// Represents a single loop in the control flow graph. Note that not all SCCs /// in the CFG are necessarily loops. -class Loop : public LoopBase<BasicBlock, Loop> { +class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> { public: /// A range representing the start and end location of a loop. class LocRange { @@ -950,7 +950,7 @@ public: /// /// Note that because loops form a forest of trees, preorder is equivalent to /// reverse postorder. - SmallVector<LoopT *, 4> getLoopsInPreorder(); + SmallVector<LoopT *, 4> getLoopsInPreorder() const; /// Return all of the loops in the function in preorder across the loop /// nests, with siblings in *reverse* program order. @@ -960,7 +960,7 @@ public: /// /// Also note that this is *not* a reverse preorder. Only the siblings are in /// reverse program order. - SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder(); + SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder() const; /// Return the inner most loop that BB lives in. If a basic block is in no /// loop (for example the entry node), null is returned. @@ -1213,6 +1213,13 @@ public: }; +/// Enable verification of loop info. +/// +/// The flag enables checks which are expensive and are disabled by default +/// unless the `EXPENSIVE_CHECKS` macro is defined. The `-verify-loop-info` +/// flag allows the checks to be enabled selectively without re-compilation. +extern bool VerifyLoopInfo; + // Allow clients to walk the list of nested loops... template <> struct GraphTraits<const Loop *> { typedef const Loop *NodeRef; @@ -1305,6 +1312,10 @@ bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name); llvm::Optional<int> getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name); +/// Find named metadata for a loop with an integer value. Return \p Default if +/// not set. +int getIntLoopAttribute(const Loop *TheLoop, StringRef Name, int Default = 0); + /// Find string metadata for loop /// /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h index 2cc9afb7c2cd..b8b8330d0fe1 100644 --- a/llvm/include/llvm/Analysis/LoopInfoImpl.h +++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h @@ -574,7 +574,8 @@ void LoopInfoBase<BlockT, LoopT>::analyze(const DomTreeBase<BlockT> &DomTree) { } template <class BlockT, class LoopT> -SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() { +SmallVector<LoopT *, 4> +LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() const { SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist; // The outer-most loop actually goes into the result in the same relative // order as we walk it. But LoopInfo stores the top level loops in reverse @@ -592,7 +593,7 @@ SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() { template <class BlockT, class LoopT> SmallVector<LoopT *, 4> -LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() { +LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() const { SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist; // The outer-most loop actually goes into the result in the same relative // order as we walk it. LoopInfo stores the top level loops in reverse diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h index 9a749a1c8eae..3d4a064cf7e3 100644 --- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -21,11 +21,14 @@ namespace llvm { using LoopVectorTy = SmallVector<Loop *, 8>; + class LPMUpdater; /// This class represents a loop nest and can be used to query its properties. -class LoopNest { +class LLVM_EXTERNAL_VISIBILITY LoopNest { public: + using InstrVectorTy = SmallVector<const Instruction *>; + /// Construct a loop nest rooted by loop \p Root. LoopNest(Loop &Root, ScalarEvolution &SE); @@ -48,6 +51,12 @@ public: static bool arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE); + /// Return a vector of instructions that prevent the LoopNest given + /// by loops \p OuterLoop and \p InnerLoop from being perfect. + static InstrVectorTy getInterveningInstructions(const Loop &OuterLoop, + const Loop &InnerLoop, + ScalarEvolution &SE); + /// Return the maximum nesting depth of the loop nest rooted by loop \p Root. /// For example given the loop nest: /// \code @@ -150,6 +159,17 @@ public: protected: const unsigned MaxPerfectDepth; // maximum perfect nesting depth level. LoopVectorTy Loops; // the loops in the nest (in breadth first order). + +private: + enum LoopNestEnum { + PerfectLoopNest, + ImperfectLoopNest, + InvalidLoopStructure, + OuterLoopLowerBoundUnknown + }; + static LoopNestEnum analyzeLoopNestForPerfectNest(const Loop &OuterLoop, + const Loop &InnerLoop, + ScalarEvolution &SE); }; raw_ostream &operator<<(raw_ostream &, const LoopNest &); diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h index 54edbb823263..a218561e61c7 100644 --- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h @@ -38,6 +38,7 @@ public: bool isForcedToStop() const { return ForceStop; } int64_t getLocalCalls(Function &F); const MLModelRunner &getModelRunner() const { return *ModelRunner.get(); } + void onModuleInvalidated() override { Invalid = true; } protected: std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override; @@ -55,6 +56,7 @@ protected: private: int64_t getModuleIRSize() const; + bool Invalid = true; std::unique_ptr<CallGraph> CG; int64_t NodeCount = 0; diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index f40b99968fd3..48aeef371e3d 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -106,9 +106,6 @@ namespace llvm { -/// Enables memory ssa as a dependency for loop passes. -extern cl::opt<bool> EnableMSSALoopDependency; - class AllocaInst; class Function; class Instruction; @@ -786,21 +783,22 @@ public: /// dominates Use \p B. bool dominates(const MemoryAccess *A, const Use &B) const; + enum class VerificationLevel { Fast, Full }; /// Verify that MemorySSA is self consistent (IE definitions dominate /// all uses, uses appear in the right places). This is used by unit tests. - void verifyMemorySSA() const; + void verifyMemorySSA(VerificationLevel = VerificationLevel::Fast) const; /// Used in various insertion functions to specify whether we are talking /// about the beginning or end of a block. enum InsertionPlace { Beginning, End, BeforeTerminator }; protected: - // Used by Memory SSA annotater, dumpers, and wrapper pass - friend class MemorySSAAnnotatedWriter; + // Used by Memory SSA dumpers and wrapper pass friend class MemorySSAPrinterLegacyPass; friend class MemorySSAUpdater; - void verifyOrderingDominationAndDefUses(Function &F) const; + void verifyOrderingDominationAndDefUses( + Function &F, VerificationLevel = VerificationLevel::Fast) const; void verifyDominationNumbers(const Function &F) const; void verifyPrevDefInPhis(Function &F) const; @@ -898,6 +896,13 @@ private: unsigned NextID; }; +/// Enables verification of MemorySSA. +/// +/// The checks which this flag enables is exensive and disabled by default +/// unless `EXPENSIVE_CHECKS` is defined. The flag `-verify-memoryssa` can be +/// used to selectively enable the verification without re-compilation. +extern bool VerifyMemorySSA; + // Internal MemorySSA utils, for use by MemorySSA classes and walkers class MemorySSAUtil { protected: @@ -956,6 +961,17 @@ public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; +/// Printer pass for \c MemorySSA via the walker. +class MemorySSAWalkerPrinterPass + : public PassInfoMixin<MemorySSAWalkerPrinterPass> { + raw_ostream &OS; + +public: + explicit MemorySSAWalkerPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + /// Verifier pass for \c MemorySSA. struct MemorySSAVerifierPass : PassInfoMixin<MemorySSAVerifierPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); diff --git a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h index 62bdade95d96..17062ab907a6 100644 --- a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h +++ b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h @@ -78,14 +78,17 @@ inline const Value *GetUnderlyingObjCPtr(const Value *V) { } /// A wrapper for GetUnderlyingObjCPtr used for results memoization. -inline const Value * -GetUnderlyingObjCPtrCached(const Value *V, - DenseMap<const Value *, WeakTrackingVH> &Cache) { - if (auto InCache = Cache.lookup(V)) - return InCache; +inline const Value *GetUnderlyingObjCPtrCached( + const Value *V, + DenseMap<const Value *, std::pair<WeakVH, WeakTrackingVH>> &Cache) { + // The entry is invalid if either value handle is null. + auto InCache = Cache.lookup(V); + if (InCache.first && InCache.second) + return InCache.second; const Value *Computed = GetUnderlyingObjCPtr(V); - Cache[V] = const_cast<Value *>(Computed); + Cache[V] = + std::make_pair(const_cast<Value *>(V), const_cast<Value *>(Computed)); return Computed; } @@ -168,8 +171,8 @@ bool IsPotentialRetainableObjPtr(const Value *Op, AAResults &AA); /// Helper for GetARCInstKind. Determines what kind of construct CS /// is. inline ARCInstKind GetCallSiteClass(const CallBase &CB) { - for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) - if (IsPotentialRetainableObjPtr(*I)) + for (const Use &U : CB.args()) + if (IsPotentialRetainableObjPtr(U)) return CB.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser; return CB.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call; @@ -204,11 +207,10 @@ inline bool IsObjCIdentifiedObject(const Value *V) { return true; StringRef Section = GV->getSection(); - if (Section.find("__message_refs") != StringRef::npos || - Section.find("__objc_classrefs") != StringRef::npos || - Section.find("__objc_superrefs") != StringRef::npos || - Section.find("__objc_methname") != StringRef::npos || - Section.find("__cstring") != StringRef::npos) + if (Section.contains("__message_refs") || + Section.contains("__objc_classrefs") || + Section.contains("__objc_superrefs") || + Section.contains("__objc_methname") || Section.contains("__cstring")) return true; } } diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h index 2566bfbcf61c..362dd6c29992 100644 --- a/llvm/include/llvm/Analysis/ObjCARCUtil.h +++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h @@ -11,9 +11,11 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_IR_OBJCARCUTIL_H -#define LLVM_IR_OBJCARCUTIL_H +#ifndef LLVM_ANALYSIS_OBJCARCUTIL_H +#define LLVM_ANALYSIS_OBJCARCUTIL_H +#include "llvm/Analysis/ObjCARCInstKind.h" +#include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/LLVMContext.h" @@ -24,13 +26,6 @@ inline const char *getRVMarkerModuleFlagStr() { return "clang.arc.retainAutoreleasedReturnValueMarker"; } -enum AttachedCallOperandBundle : unsigned { RVOB_Retain, RVOB_Claim }; - -inline AttachedCallOperandBundle -getAttachedCallOperandBundleEnum(bool IsRetain) { - return IsRetain ? RVOB_Retain : RVOB_Claim; -} - inline bool hasAttachedCallOpBundle(const CallBase *CB) { // Ignore the bundle if the return type is void. Global optimization passes // can turn the called function's return type to void. That should happen only @@ -43,14 +38,32 @@ inline bool hasAttachedCallOpBundle(const CallBase *CB) { .hasValue(); } -inline bool hasAttachedCallOpBundle(const CallBase *CB, bool IsRetain) { - assert(hasAttachedCallOpBundle(CB) && - "call doesn't have operand bundle clang_arc_attachedcall"); +/// This function returns operand bundle clang_arc_attachedcall's argument, +/// which is the address of the ARC runtime function. +inline Optional<Function *> getAttachedARCFunction(const CallBase *CB) { auto B = CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall); - if (!B.hasValue()) - return false; - return cast<ConstantInt>(B->Inputs[0])->getZExtValue() == - getAttachedCallOperandBundleEnum(IsRetain); + if (!B.hasValue() || B->Inputs.size() == 0) + return None; + + return cast<Function>(B->Inputs[0]); +} + +/// Check whether the function is retainRV/claimRV. +inline bool isRetainOrClaimRV(ARCInstKind Kind) { + return Kind == ARCInstKind::RetainRV || Kind == ARCInstKind::ClaimRV; +} + +/// This function returns the ARCInstKind of the function attached to operand +/// bundle clang_arc_attachedcall. It returns None if the call doesn't have the +/// operand bundle or the operand is null. Otherwise it returns either RetainRV +/// or ClaimRV. +inline ARCInstKind getAttachedARCFunctionKind(const CallBase *CB) { + Optional<Function *> Fn = getAttachedARCFunction(CB); + if (!Fn.hasValue()) + return ARCInstKind::None; + auto FnClass = GetFunctionClass(*Fn); + assert(isRetainOrClaimRV(FnClass) && "unexpected ARC runtime function"); + return FnClass; } } // end namespace objcarc diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h index c95404d96f4e..886800d8a0f5 100644 --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -134,9 +134,13 @@ public: bool isColdCount(uint64_t C) const; /// Returns true if count \p C is considered hot with regard to a given /// hot percentile cutoff value. + /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where + /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile. bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const; /// Returns true if count \p C is considered cold with regard to a given /// cold percentile cutoff value. + /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where + /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile. bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const; /// Returns true if BasicBlock \p BB is considered hot. bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const; @@ -144,10 +148,14 @@ public: bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const; /// Returns true if BasicBlock \p BB is considered hot with regard to a given /// hot percentile cutoff value. + /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where + /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile. bool isHotBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const; /// Returns true if BasicBlock \p BB is considered cold with regard to a given /// cold percentile cutoff value. + /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where + /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile. bool isColdBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const; /// Returns true if the call site \p CB is considered hot. @@ -162,11 +170,11 @@ public: uint64_t getOrCompColdCountThreshold() const; /// Returns HotCountThreshold if set. uint64_t getHotCountThreshold() const { - return HotCountThreshold ? HotCountThreshold.getValue() : 0; + return HotCountThreshold.getValueOr(0); } /// Returns ColdCountThreshold if set. uint64_t getColdCountThreshold() const { - return ColdCountThreshold ? ColdCountThreshold.getValue() : 0; + return ColdCountThreshold.getValueOr(0); } private: diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h index 3018bcc241d8..a0eb9af62205 100644 --- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h +++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h @@ -20,6 +20,46 @@ class Function; class Module; class OptimizationRemarkEmitter; +struct CallSiteFormat { + enum class Format : int { + Line, + LineColumn, + LineDiscriminator, + LineColumnDiscriminator + }; + + bool outputColumn() const { + return OutputFormat == Format::LineColumn || + OutputFormat == Format::LineColumnDiscriminator; + } + + bool outputDiscriminator() const { + return OutputFormat == Format::LineDiscriminator || + OutputFormat == Format::LineColumnDiscriminator; + } + + Format OutputFormat; +}; + +/// Replay Inliner Setup +struct ReplayInlinerSettings { + enum class Scope : int { Function, Module }; + enum class Fallback : int { Original, AlwaysInline, NeverInline }; + + StringRef ReplayFile; + Scope ReplayScope; + Fallback ReplayFallback; + CallSiteFormat ReplayFormat; +}; + +/// Get call site location as a string with the given format +std::string formatCallSiteLocation(DebugLoc DLoc, const CallSiteFormat &Format); + +std::unique_ptr<InlineAdvisor> getReplayInlineAdvisor( + Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, + std::unique_ptr<InlineAdvisor> OriginalAdvisor, + const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks); + /// Replay inline advisor that uses optimization remarks from inlining of /// previous build to guide current inlining. This is useful for inliner tuning. class ReplayInlineAdvisor : public InlineAdvisor { @@ -27,15 +67,24 @@ public: ReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr<InlineAdvisor> OriginalAdvisor, - StringRef RemarksFile, bool EmitRemarks); + const ReplayInlinerSettings &ReplaySettings, + bool EmitRemarks); std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override; bool areReplayRemarksLoaded() const { return HasReplayRemarks; } private: - StringSet<> InlineSitesFromRemarks; + bool hasInlineAdvice(Function &F) const { + return (ReplaySettings.ReplayScope == + ReplayInlinerSettings::Scope::Module) || + CallersToReplay.contains(F.getName()); + } std::unique_ptr<InlineAdvisor> OriginalAdvisor; bool HasReplayRemarks = false; + const ReplayInlinerSettings ReplaySettings; bool EmitRemarks = false; + + StringMap<bool> InlineSitesFromRemarks; + StringSet<> CallersToReplay; }; } // namespace llvm #endif // LLVM_ANALYSIS_REPLAYINLINEADVISOR_H diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index ae9c73fede96..a2260688e3d6 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -25,7 +25,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/Hashing.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SetVector.h" @@ -112,6 +111,24 @@ public: /// Note that NUW and NSW are also valid properties of a recurrence, and /// either implies NW. For convenience, NW will be set for a recurrence /// whenever either NUW or NSW are set. + /// + /// We require that the flag on a SCEV apply to the entire scope in which + /// that SCEV is defined. A SCEV's scope is set of locations dominated by + /// a defining location, which is in turn described by the following rules: + /// * A SCEVUnknown is at the point of definition of the Value. + /// * A SCEVConstant is defined at all points. + /// * A SCEVAddRec is defined starting with the header of the associated + /// loop. + /// * All other SCEVs are defined at the earlest point all operands are + /// defined. + /// + /// The above rules describe a maximally hoisted form (without regards to + /// potential control dependence). A SCEV is defined anywhere a + /// corresponding instruction could be defined in said maximally hoisted + /// form. Note that SCEVUDivExpr (currently the only expression type which + /// can trap) can be defined per these rules in regions where it would trap + /// at runtime. A SCEV being defined does not require the existence of any + /// instruction within the defined scope. enum NoWrapFlags { FlagAnyWrap = 0, // No guarantee. FlagNW = (1 << 0), // No self-wrap. @@ -472,6 +489,10 @@ public: clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags) { return (SCEV::NoWrapFlags)(Flags & ~OffFlags); } + LLVM_NODISCARD static bool hasFlags(SCEV::NoWrapFlags Flags, + SCEV::NoWrapFlags TestFlags) { + return TestFlags == maskFlags(Flags, TestFlags); + }; ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI); @@ -498,13 +519,26 @@ public: // Returns a wider type among {Ty1, Ty2}. Type *getWiderType(Type *Ty1, Type *Ty2) const; + /// Return true if there exists a point in the program at which both + /// A and B could be operands to the same instruction. + /// SCEV expressions are generally assumed to correspond to instructions + /// which could exists in IR. In general, this requires that there exists + /// a use point in the program where all operands dominate the use. + /// + /// Example: + /// loop { + /// if + /// loop { v1 = load @global1; } + /// else + /// loop { v2 = load @global2; } + /// } + /// No SCEV with operand V1, and v2 can exist in this program. + bool instructionCouldExistWitthOperands(const SCEV *A, const SCEV *B); + /// Return true if the SCEV is a scAddRecExpr or it contains /// scAddRecExpr. The result will be cached in HasRecMap. bool containsAddRecurrence(const SCEV *S); - /// Erase Value from ValueExprMap and ExprValueMap. - void eraseValueFromMap(Value *V); - /// Is operation \p BinOp between \p LHS and \p RHS provably does not have /// a signed/unsigned overflow (\p Signed)? bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, @@ -516,6 +550,12 @@ public: std::pair<SCEV::NoWrapFlags, bool /*Deduced*/> getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO); + /// Notify this ScalarEvolution that \p User directly uses SCEVs in \p Ops. + void registerUser(const SCEV *User, ArrayRef<const SCEV *> Ops); + + /// Return true if the SCEV expression contains an undef value. + bool containsUndefs(const SCEV *S) const; + /// Return a SCEV expression for the full generality of the specified /// expression. const SCEV *getSCEV(Value *V); @@ -700,6 +740,9 @@ public: /// cases do exist. const SCEV *getPointerBase(const SCEV *V); + /// Compute an expression equivalent to S - getPointerBase(S). + const SCEV *removePointerBase(const SCEV *S); + /// Return a SCEV expression for the specified value at the specified scope /// in the program. The L value specifies a loop nest to evaluate the /// expression at, where null is the top-level or a specified loop is @@ -735,9 +778,13 @@ public: /// Convert from an "exit count" (i.e. "backedge taken count") to a "trip /// count". A "trip count" is the number of times the header of the loop /// will execute if an exit is taken after the specified number of backedges - /// have been taken. (e.g. TripCount = ExitCount + 1) A zero result - /// must be interpreted as a loop having an unknown trip count. - const SCEV *getTripCountFromExitCount(const SCEV *ExitCount); + /// have been taken. (e.g. TripCount = ExitCount + 1). Note that the + /// expression can overflow if ExitCount = UINT_MAX. \p Extend controls + /// how potential overflow is handled. If true, a wider result type is + /// returned. ex: EC = 255 (i8), TC = 256 (i9). If false, result unsigned + /// wraps with 2s-complement semantics. ex: EC = 255 (i8), TC = 0 (i8) + const SCEV *getTripCountFromExitCount(const SCEV *ExitCount, + bool Extend = true); /// Returns the exact trip count of the loop if we can compute it, and /// the result is a small constant. '0' is used to represent an unknown @@ -762,6 +809,13 @@ public: /// Returns 0 if the trip count is unknown or not constant. unsigned getSmallConstantMaxTripCount(const Loop *L); + /// Returns the upper bound of the loop trip count infered from array size. + /// Can not access bytes starting outside the statically allocated size + /// without being immediate UB. + /// Returns SCEVCouldNotCompute if the trip count could not inferred + /// from array accesses. + const SCEV *getConstantMaxTripCountFromArray(const Loop *L); + /// Returns the largest constant divisor of the trip count as a normal /// unsigned value, if possible. This means that the actual trip count is /// always a multiple of the returned value. Returns 1 if the trip count is @@ -988,14 +1042,13 @@ public: /// Test if the given expression is known to satisfy the condition described /// by Pred, LHS, and RHS in the given Context. bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, - const SCEV *RHS, const Instruction *Context); + const SCEV *RHS, const Instruction *CtxI); /// Check whether the condition described by Pred, LHS, and RHS is true or /// false in the given \p Context. If we know it, return the evaluation of /// this condition. If neither is proved, return None. Optional<bool> evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS, - const SCEV *RHS, - const Instruction *Context); + const SCEV *RHS, const Instruction *CtxI); /// Test if the condition described by Pred, LHS, RHS is known to be true on /// every iteration of the loop of the recurrency LHS. @@ -1045,7 +1098,7 @@ public: getLoopInvariantExitCondDuringFirstIterations(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, - const Instruction *Context, + const Instruction *CtxI, const SCEV *MaxIter); /// Simplify LHS and RHS in a comparison with predicate Pred. Return true @@ -1092,110 +1145,11 @@ public: /// Return the size of an element read or written by Inst. const SCEV *getElementSize(Instruction *Inst); - /// Compute the array dimensions Sizes from the set of Terms extracted from - /// the memory access function of this SCEVAddRecExpr (second step of - /// delinearization). - void findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, - SmallVectorImpl<const SCEV *> &Sizes, - const SCEV *ElementSize); - void print(raw_ostream &OS) const; void verify() const; bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv); - /// Collect parametric terms occurring in step expressions (first step of - /// delinearization). - void collectParametricTerms(const SCEV *Expr, - SmallVectorImpl<const SCEV *> &Terms); - - /// Return in Subscripts the access functions for each dimension in Sizes - /// (third step of delinearization). - void computeAccessFunctions(const SCEV *Expr, - SmallVectorImpl<const SCEV *> &Subscripts, - SmallVectorImpl<const SCEV *> &Sizes); - - /// Gathers the individual index expressions from a GEP instruction. - /// - /// This function optimistically assumes the GEP references into a fixed size - /// array. If this is actually true, this function returns a list of array - /// subscript expressions in \p Subscripts and a list of integers describing - /// the size of the individual array dimensions in \p Sizes. Both lists have - /// either equal length or the size list is one element shorter in case there - /// is no known size available for the outermost array dimension. Returns true - /// if successful and false otherwise. - bool getIndexExpressionsFromGEP(const GetElementPtrInst *GEP, - SmallVectorImpl<const SCEV *> &Subscripts, - SmallVectorImpl<int> &Sizes); - - /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the - /// subscripts and sizes of an array access. - /// - /// The delinearization is a 3 step process: the first two steps compute the - /// sizes of each subscript and the third step computes the access functions - /// for the delinearized array: - /// - /// 1. Find the terms in the step functions - /// 2. Compute the array size - /// 3. Compute the access function: divide the SCEV by the array size - /// starting with the innermost dimensions found in step 2. The Quotient - /// is the SCEV to be divided in the next step of the recursion. The - /// Remainder is the subscript of the innermost dimension. Loop over all - /// array dimensions computed in step 2. - /// - /// To compute a uniform array size for several memory accesses to the same - /// object, one can collect in step 1 all the step terms for all the memory - /// accesses, and compute in step 2 a unique array shape. This guarantees - /// that the array shape will be the same across all memory accesses. - /// - /// FIXME: We could derive the result of steps 1 and 2 from a description of - /// the array shape given in metadata. - /// - /// Example: - /// - /// A[][n][m] - /// - /// for i - /// for j - /// for k - /// A[j+k][2i][5i] = - /// - /// The initial SCEV: - /// - /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k] - /// - /// 1. Find the different terms in the step functions: - /// -> [2*m, 5, n*m, n*m] - /// - /// 2. Compute the array size: sort and unique them - /// -> [n*m, 2*m, 5] - /// find the GCD of all the terms = 1 - /// divide by the GCD and erase constant terms - /// -> [n*m, 2*m] - /// GCD = m - /// divide by GCD -> [n, 2] - /// remove constant terms - /// -> [n] - /// size of the array is A[unknown][n][m] - /// - /// 3. Compute the access function - /// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m - /// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k - /// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k - /// The remainder is the subscript of the innermost array dimension: [5i]. - /// - /// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n - /// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k - /// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k - /// The Remainder is the subscript of the next array dimension: [2i]. - /// - /// The subscript of the outermost dimension is the Quotient: [j+k]. - /// - /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i]. - void delinearize(const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts, - SmallVectorImpl<const SCEV *> &Sizes, - const SCEV *ElementSize); - /// Return the DataLayout associated with the module this SCEV instance is /// operating on. const DataLayout &getDataLayout() const { @@ -1234,6 +1188,18 @@ public: /// Try to apply information from loop guards for \p L to \p Expr. const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L); + /// Return true if the loop has no abnormal exits. That is, if the loop + /// is not infinite, it must exit through an explicit edge in the CFG. + /// (As opposed to either a) throwing out of the function or b) entering a + /// well defined infinite loop in some callee.) + bool loopHasNoAbnormalExits(const Loop *L) { + return getLoopProperties(L).HasNoAbnormalExits; + } + + /// Return true if this loop is finite by assumption. That is, + /// to be infinite, it must also be undefined. + bool loopIsFiniteByAssumption(const Loop *L); + private: /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a /// Value is deleted. @@ -1532,15 +1498,15 @@ private: LoopDispositions; struct LoopProperties { - /// Set to true if the loop contains no instruction that can have side - /// effects (i.e. via throwing an exception, volatile or atomic access). - bool HasNoAbnormalExits; - /// Set to true if the loop contains no instruction that can abnormally exit /// the loop (i.e. via throwing an exception, by terminating the thread /// cleanly or by infinite looping in a called function). Strictly /// speaking, the last one is not leaving the loop, but is identical to /// leaving the loop for reasoning about undefined behavior. + bool HasNoAbnormalExits; + + /// Set to true if the loop contains no instruction that can have side + /// effects (i.e. via throwing an exception, volatile or atomic access). bool HasNoSideEffects; }; @@ -1554,14 +1520,6 @@ private: return getLoopProperties(L).HasNoSideEffects; } - bool loopHasNoAbnormalExits(const Loop *L) { - return getLoopProperties(L).HasNoAbnormalExits; - } - - /// Return true if this loop is finite by assumption. That is, - /// to be infinite, it must also be undefined. - bool loopIsFiniteByAssumption(const Loop *L); - /// Compute a LoopDisposition value. LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L); @@ -1574,6 +1532,9 @@ private: /// Compute a BlockDisposition value. BlockDisposition computeBlockDisposition(const SCEV *S, const BasicBlock *BB); + /// Stores all SCEV that use a given SCEV as its direct operand. + DenseMap<const SCEV *, SmallPtrSet<const SCEV *, 8> > SCEVUsers; + /// Memoized results from getRange DenseMap<const SCEV *, ConstantRange> UnsignedRanges; @@ -1600,22 +1561,22 @@ private: /// copied if its needed for longer. const ConstantRange &getRangeRef(const SCEV *S, RangeSignHint Hint); - /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Stop}. + /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Step}. /// Helper for \c getRange. - ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop, + ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Step, const SCEV *MaxBECount, unsigned BitWidth); /// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p - /// Start,+,\p Stop}<nw>. + /// Start,+,\p Step}<nw>. ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth, RangeSignHint SignHint); /// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p - /// Stop} by "factoring out" a ternary expression from the add recurrence. + /// Step} by "factoring out" a ternary expression from the add recurrence. /// Helper called by \c getRange. - ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop, + ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Step, const SCEV *MaxBECount, unsigned BitWidth); /// If the unknown expression U corresponds to a simple recurrence, return @@ -1761,12 +1722,6 @@ private: BasicBlock *ExitingBB, bool IsSubExpr); - /// Given an exit condition of 'icmp op load X, cst', try to see if we can - /// compute the backedge-taken count. - ExitLimit computeLoadConstantCompareExitLimit(LoadInst *LI, Constant *RHS, - const Loop *L, - ICmpInst::Predicate p); - /// Compute the exit limit of a loop that is controlled by a /// "(IV >> 1) != 0" type comparison. We cannot compute the exact trip /// count in these cases (since SCEV has no way of expressing them), but we @@ -1839,7 +1794,7 @@ private: const SCEV *RHS, ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS, - const Instruction *Context); + const Instruction *CtxI); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is @@ -1914,7 +1869,7 @@ private: const SCEV *LHS, const SCEV *RHS, const SCEV *FoundLHS, const SCEV *FoundRHS, - const Instruction *Context); + const Instruction *CtxI); /// Test whether the condition described by Pred, LHS, and RHS is true /// whenever the condition described by Pred, FoundLHS, and FoundRHS is @@ -1956,12 +1911,18 @@ private: bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R, SCEV::NoWrapFlags &Flags); - /// Drop memoized information computed for S. - void forgetMemoizedResults(const SCEV *S); + /// Drop memoized information for all \p SCEVs. + void forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs); + + /// Helper for forgetMemoizedResults. + void forgetMemoizedResultsImpl(const SCEV *S); /// Return an existing SCEV for V if there is one, otherwise return nullptr. const SCEV *getExistingSCEV(Value *V); + /// Erase Value from ValueExprMap and ExprValueMap. + void eraseValueFromMap(Value *V); + /// Return false iff given SCEV contains a SCEVUnknown with NULL value- /// pointer. bool checkValidity(const SCEV *S) const; @@ -1995,6 +1956,27 @@ private: /// would trigger undefined behavior on overflow. SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V); + /// Return a scope which provides an upper bound on the defining scope of + /// 'S'. Specifically, return the first instruction in said bounding scope. + /// Return nullptr if the scope is trivial (function entry). + /// (See scope definition rules associated with flag discussion above) + const Instruction *getNonTrivialDefiningScopeBound(const SCEV *S); + + /// Return a scope which provides an upper bound on the defining scope for + /// a SCEV with the operands in Ops. The outparam Precise is set if the + /// bound found is a precise bound (i.e. must be the defining scope.) + const Instruction *getDefiningScopeBound(ArrayRef<const SCEV *> Ops, + bool &Precise); + + /// Wrapper around the above for cases which don't care if the bound + /// is precise. + const Instruction *getDefiningScopeBound(ArrayRef<const SCEV *> Ops); + + /// Given two instructions in the same function, return true if we can + /// prove B must execute given A executes. + bool isGuaranteedToTransferExecutionTo(const Instruction *A, + const Instruction *B); + /// Return true if the SCEV corresponding to \p I is never poison. Proving /// this is more complex than proving that just \p I is never poison, since /// SCEV commons expressions across control flow, and you can have cases @@ -2036,8 +2018,11 @@ private: /// permitted by Start, End, and Stride. This is for loops of the form /// {Start, +, Stride} LT End. /// - /// Precondition: the induction variable is known to be positive. We *don't* - /// assert these preconditions so please be careful. + /// Preconditions: + /// * the induction variable is known to be positive. + /// * the induction variable is assumed not to overflow (i.e. either it + /// actually doesn't, or we'd have to immediately execute UB) + /// We *don't* assert these preconditions so please be careful. const SCEV *computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride, const SCEV *End, unsigned BitWidth, bool IsSigned); @@ -2072,31 +2057,20 @@ private: /// an add rec on said loop. void getUsedLoops(const SCEV *S, SmallPtrSetImpl<const Loop *> &LoopsUsed); - /// Find all of the loops transitively used in \p S, and update \c LoopUsers - /// accordingly. - void addToLoopUseLists(const SCEV *S); - /// Try to match the pattern generated by getURemExpr(A, B). If successful, /// Assign A and B to LHS and RHS, respectively. bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS); /// Look for a SCEV expression with type `SCEVType` and operands `Ops` in - /// `UniqueSCEVs`. - /// - /// The first component of the returned tuple is the SCEV if found and null - /// otherwise. The second component is the `FoldingSetNodeID` that was - /// constructed to look up the SCEV and the third component is the insertion - /// point. - std::tuple<SCEV *, FoldingSetNodeID, void *> - findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops); + /// `UniqueSCEVs`. Return if found, else nullptr. + SCEV *findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops); FoldingSet<SCEV> UniqueSCEVs; FoldingSet<SCEVPredicate> UniquePreds; BumpPtrAllocator SCEVAllocator; - /// This maps loops to a list of SCEV expressions that (transitively) use said - /// loop. - DenseMap<const Loop *, SmallVector<const SCEV *, 4>> LoopUsers; + /// This maps loops to a list of addrecs that directly use said loop. + DenseMap<const Loop *, SmallVector<const SCEVAddRecExpr *, 4>> LoopUsers; /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression /// they can be rewritten into under certain predicates. diff --git a/llvm/include/llvm/Analysis/StackLifetime.h b/llvm/include/llvm/Analysis/StackLifetime.h index df342a9533ee..239aec4e258b 100644 --- a/llvm/include/llvm/Analysis/StackLifetime.h +++ b/llvm/include/llvm/Analysis/StackLifetime.h @@ -191,6 +191,8 @@ public: StackLifetimePrinterPass(raw_ostream &OS, StackLifetime::LivenessType Type) : Type(Type), OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); }; } // end namespace llvm diff --git a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h index 59c1e3e3bd56..751735f3e59f 100644 --- a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h +++ b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h @@ -75,7 +75,15 @@ public: StackSafetyGlobalInfo &operator=(StackSafetyGlobalInfo &&); ~StackSafetyGlobalInfo(); + // Whether we can prove that all accesses to this Alloca are in-range and + // during its lifetime. bool isSafe(const AllocaInst &AI) const; + + // Returns true if the instruction can be proven to do only two types of + // memory accesses: + // (1) live stack locations in-bounds or + // (2) non-stack locations. + bool stackAccessIsSafe(const Instruction &I) const; void print(raw_ostream &O) const; void dump() const; }; diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index 22bfeda0efd0..6e3e1380535e 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -76,7 +76,7 @@ class TargetLibraryInfoImpl { /// Return true if the function type FTy is valid for the library function /// F, regardless of whether the function is available. bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F, - const DataLayout *DL) const; + const Module &M) const; public: /// List of known vector-functions libraries. @@ -115,6 +115,8 @@ public: /// /// If it is one of the known library functions, return true and set F to the /// corresponding value. + /// + /// FDecl is assumed to have a parent Module when using this function. bool getLibFunc(const Function &FDecl, LibFunc &F) const; /// Forces a function to be marked as unavailable. @@ -238,7 +240,7 @@ public: else { // Disable individual libc/libm calls in TargetLibraryInfo. LibFunc LF; - AttributeSet FnAttrs = (*F)->getAttributes().getFnAttributes(); + AttributeSet FnAttrs = (*F)->getAttributes().getFnAttrs(); for (const Attribute &Attr : FnAttrs) { if (!Attr.isStringAttribute()) continue; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 628058142e48..170d6b8f35ff 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -21,7 +21,6 @@ #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H -#include "llvm/Analysis/IVDescriptors.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" @@ -31,6 +30,7 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/InstructionCost.h" #include <functional> +#include <utility> namespace llvm { @@ -47,12 +47,14 @@ class ExtractElementInst; class Function; class GlobalValue; class InstCombiner; +class OptimizationRemarkEmitter; class IntrinsicInst; class LoadInst; class LoopAccessInfo; class Loop; class LoopInfo; class ProfileSummaryInfo; +class RecurrenceDescriptor; class SCEV; class ScalarEvolution; class StoreInst; @@ -97,7 +99,7 @@ struct HardwareLoopInfo { Loop *L = nullptr; BasicBlock *ExitBlock = nullptr; BranchInst *ExitBranch = nullptr; - const SCEV *TripCount = nullptr; + const SCEV *ExitCount = nullptr; IntegerType *CountType = nullptr; Value *LoopDecrement = nullptr; // Decrement the loop counter by this // value in every iteration. @@ -382,8 +384,15 @@ public: bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const; + /// Return true if globals in this address space can have initializers other + /// than `undef`. + bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const; + unsigned getAssumedAddrSpace(const Value *V) const; + std::pair<const Value *, unsigned> + getPredicatedAddrSpace(const Value *V) const; + /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p /// NewV, which has a different address space. This should happen for every /// operand index that collectFlatAddressOperands returned for the intrinsic. @@ -506,7 +515,8 @@ public: /// transformation. The caller will initialize UP with the current /// target-independent defaults. void getUnrollingPreferences(Loop *L, ScalarEvolution &, - UnrollingPreferences &UP) const; + UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) const; /// Query the target whether it would be profitable to convert the given loop /// into a hardware loop. @@ -660,6 +670,9 @@ public: /// Return true if the target supports masked expand load. bool isLegalMaskedExpandLoad(Type *DataType) const; + /// Return true if we should be enabling ordered reductions for the target. + bool enableOrderedReductions() const; + /// Return true if the target has a unified operation to calculate division /// and remainder. If so, the additional implicit multiplication and /// subtraction required to calculate a remainder from division are free. This @@ -907,6 +920,9 @@ public: /// architectural maximum vector length, and None otherwise. Optional<unsigned> getMaxVScale() const; + /// \return the value of vscale to tune the cost model for. + Optional<unsigned> getVScaleForTuning() const; + /// \return True if the vectorization factor should be chosen to /// make the vector of the smallest element type match the size of a /// vector register. For wider element types, this could result in @@ -1094,8 +1110,8 @@ public: /// is using a compare with the specified predicate as condition. When vector /// types are passed, \p VecPred must be used for all lanes. InstructionCost - getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, - CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE, + getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, const Instruction *I = nullptr) const; @@ -1104,6 +1120,16 @@ public: InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const; + /// \return The cost of replication shuffle of \p VF elements typed \p EltTy + /// \p ReplicationFactor times. + /// + /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: + /// <0,0,0,1,1,1,2,2,2,3,3,3> + InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, + int VF, + const APInt &DemandedDstElts, + TTI::TargetCostKind CostKind); + /// \return The cost of Load and Store instructions. InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, @@ -1452,13 +1478,18 @@ public: virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const = 0; virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; + virtual bool + canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0; virtual unsigned getAssumedAddrSpace(const Value *V) const = 0; + virtual std::pair<const Value *, unsigned> + getPredicatedAddrSpace(const Value *V) const = 0; virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const = 0; virtual bool isLoweredToCall(const Function *F) = 0; virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, - UnrollingPreferences &UP) = 0; + UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) = 0; virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) = 0; virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, @@ -1505,6 +1536,7 @@ public: virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0; virtual bool isLegalMaskedCompressStore(Type *DataType) = 0; virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0; + virtual bool enableOrderedReductions() = 0; virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; virtual bool prefersVectorizedAddressing() = 0; @@ -1563,6 +1595,7 @@ public: virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0; virtual unsigned getMinVectorRegisterBitWidth() const = 0; virtual Optional<unsigned> getMaxVScale() const = 0; + virtual Optional<unsigned> getVScaleForTuning() const = 0; virtual bool shouldMaximizeVectorBandwidth() const = 0; virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const = 0; @@ -1623,6 +1656,12 @@ public: const Instruction *I) = 0; virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) = 0; + + virtual InstructionCost + getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, + const APInt &DemandedDstElts, + TTI::TargetCostKind CostKind) = 0; + virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, @@ -1730,8 +1769,8 @@ public: InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef<const Value *> Operands, - enum TargetTransformInfo::TargetCostKind CostKind) override { - return Impl.getGEPCost(PointeeType, Ptr, Operands); + TargetTransformInfo::TargetCostKind CostKind) override { + return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind); } unsigned getInliningThresholdMultiplier() override { return Impl.getInliningThresholdMultiplier(); @@ -1775,10 +1814,20 @@ public: return Impl.isNoopAddrSpaceCast(FromAS, ToAS); } + bool + canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override { + return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS); + } + unsigned getAssumedAddrSpace(const Value *V) const override { return Impl.getAssumedAddrSpace(V); } + std::pair<const Value *, unsigned> + getPredicatedAddrSpace(const Value *V) const override { + return Impl.getPredicatedAddrSpace(V); + } + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override { return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV); @@ -1788,8 +1837,9 @@ public: return Impl.isLoweredToCall(F); } void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - UnrollingPreferences &UP) override { - return Impl.getUnrollingPreferences(L, SE, UP); + UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) override { + return Impl.getUnrollingPreferences(L, SE, UP, ORE); } void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) override { @@ -1886,6 +1936,9 @@ public: bool isLegalMaskedExpandLoad(Type *DataType) override { return Impl.isLegalMaskedExpandLoad(DataType); } + bool enableOrderedReductions() override { + return Impl.enableOrderedReductions(); + } bool hasDivRemOp(Type *DataType, bool IsSigned) override { return Impl.hasDivRemOp(DataType, IsSigned); } @@ -2015,6 +2068,9 @@ public: Optional<unsigned> getMaxVScale() const override { return Impl.getMaxVScale(); } + Optional<unsigned> getVScaleForTuning() const override { + return Impl.getVScaleForTuning(); + } bool shouldMaximizeVectorBandwidth() const override { return Impl.shouldMaximizeVectorBandwidth(); } @@ -2115,6 +2171,13 @@ public: unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); } + InstructionCost + getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, + const APInt &DemandedDstElts, + TTI::TargetCostKind CostKind) override { + return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF, + DemandedDstElts, CostKind); + } InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index c07a33c9f155..05ef2495475f 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -24,6 +24,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" +#include <utility> using namespace llvm::PatternMatch; @@ -47,10 +48,9 @@ public: const DataLayout &getDataLayout() const { return DL; } - InstructionCost - getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef<const Value *> Operands, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const { + InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands, + TTI::TargetCostKind CostKind) const { // In the basic model, we just assume that all-constant GEPs will be folded // into their uses via addressing modes. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) @@ -105,9 +105,17 @@ public: } bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } + bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { + return AS == 0; + }; unsigned getAssumedAddrSpace(const Value *V) const { return -1; } + std::pair<const Value *, unsigned> + getPredicatedAddrSpace(const Value *V) const { + return std::make_pair(nullptr, -1); + } + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const { return nullptr; @@ -187,7 +195,8 @@ public: } void getUnrollingPreferences(Loop *, ScalarEvolution &, - TTI::UnrollingPreferences &) const {} + TTI::UnrollingPreferences &, + OptimizationRemarkEmitter *) const {} void getPeelingPreferences(Loop *, ScalarEvolution &, TTI::PeelingPreferences &) const {} @@ -262,6 +271,8 @@ public: bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } + bool enableOrderedReductions() const { return false; } + bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { @@ -394,6 +405,7 @@ public: unsigned getMinVectorRegisterBitWidth() const { return 128; } Optional<unsigned> getMaxVScale() const { return None; } + Optional<unsigned> getVScaleForTuning() const { return None; } bool shouldMaximizeVectorBandwidth() const { return false; } @@ -539,6 +551,12 @@ public: return 1; } + unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, + const APInt &DemandedDstElts, + TTI::TargetCostKind CostKind) { + return 1; + } + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, @@ -614,7 +632,8 @@ public: return 1; } - unsigned getNumberOfParts(Type *Tp) const { return 0; } + // Assume that we have a register of the right size for the type. + unsigned getNumberOfParts(Type *Tp) const { return 1; } InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, const SCEV *) const { @@ -632,9 +651,10 @@ public: return 1; } - InstructionCost getExtendedAddReductionCost( - bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const { + InstructionCost + getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, + VectorType *Ty, + TTI::TargetCostKind CostKind) const { return 1; } @@ -856,10 +876,9 @@ protected: public: using BaseT::getGEPCost; - InstructionCost - getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef<const Value *> Operands, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { + InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, + ArrayRef<const Value *> Operands, + TTI::TargetCostKind CostKind) { assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); assert(cast<PointerType>(Ptr->getType()->getScalarType()) ->isOpaqueOrPointeeTypeMatches(PointeeType) && @@ -964,10 +983,10 @@ public: return TTI::TCC_Free; break; case Instruction::GetElementPtr: { - const GEPOperator *GEP = cast<GEPOperator>(U); + const auto *GEP = cast<GEPOperator>(U); return TargetTTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), - Operands.drop_front()); + Operands.drop_front(), CostKind); } case Instruction::Add: case Instruction::FAdd: @@ -1063,58 +1082,94 @@ public: auto *IE = dyn_cast<InsertElementInst>(U); if (!IE) return TTI::TCC_Basic; // FIXME - auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); - unsigned Idx = CI ? CI->getZExtValue() : -1; + unsigned Idx = -1; + if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) + if (CI->getValue().getActiveBits() <= 32) + Idx = CI->getZExtValue(); return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx); } case Instruction::ShuffleVector: { auto *Shuffle = dyn_cast<ShuffleVectorInst>(U); if (!Shuffle) return TTI::TCC_Basic; // FIXME + auto *VecTy = cast<VectorType>(U->getType()); auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType()); + int NumSubElts, SubIndex; + + if (Shuffle->changesLength()) { + // Treat a 'subvector widening' as a free shuffle. + if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) + return 0; + + if (Shuffle->isExtractSubvectorMask(SubIndex)) + return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, + Shuffle->getShuffleMask(), SubIndex, + VecTy); + + if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) + return TargetTTI->getShuffleCost( + TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), + SubIndex, + FixedVectorType::get(VecTy->getScalarType(), NumSubElts)); + + int ReplicationFactor, VF; + if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { + APInt DemandedDstElts = + APInt::getNullValue(Shuffle->getShuffleMask().size()); + for (auto I : enumerate(Shuffle->getShuffleMask())) { + if (I.value() != UndefMaskElem) + DemandedDstElts.setBit(I.index()); + } + return TargetTTI->getReplicationShuffleCost( + VecSrcTy->getElementType(), ReplicationFactor, VF, + DemandedDstElts, CostKind); + } - // TODO: Identify and add costs for insert subvector, etc. - int SubIndex; - if (Shuffle->isExtractSubvectorMask(SubIndex)) - return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, - Shuffle->getShuffleMask(), SubIndex, - VecTy); - else if (Shuffle->changesLength()) return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; - else if (Shuffle->isIdentity()) + } + + if (Shuffle->isIdentity()) return 0; - else if (Shuffle->isReverse()) + + if (Shuffle->isReverse()) return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Shuffle->getShuffleMask(), 0, nullptr); - else if (Shuffle->isSelect()) + + if (Shuffle->isSelect()) return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Shuffle->getShuffleMask(), 0, nullptr); - else if (Shuffle->isTranspose()) + + if (Shuffle->isTranspose()) return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Shuffle->getShuffleMask(), 0, nullptr); - else if (Shuffle->isZeroEltSplat()) + + if (Shuffle->isZeroEltSplat()) return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Shuffle->getShuffleMask(), 0, nullptr); - else if (Shuffle->isSingleSource()) + + if (Shuffle->isSingleSource()) return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Shuffle->getShuffleMask(), 0, nullptr); + if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) + return TargetTTI->getShuffleCost( + TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex, + FixedVectorType::get(VecTy->getScalarType(), NumSubElts)); + return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Shuffle->getShuffleMask(), 0, nullptr); } case Instruction::ExtractElement: { - unsigned Idx = -1; auto *EEI = dyn_cast<ExtractElementInst>(U); if (!EEI) return TTI::TCC_Basic; // FIXME - - auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)); - if (CI) - Idx = CI->getZExtValue(); - - return TargetTTI->getVectorInstrCost(Opcode, U->getOperand(0)->getType(), - Idx); + unsigned Idx = -1; + if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1))) + if (CI->getValue().getActiveBits() <= 32) + Idx = CI->getZExtValue(); + Type *DstTy = U->getOperand(0)->getType(); + return TargetTTI->getVectorInstrCost(Opcode, DstTy, Idx); } } // By default, just classify everything as 'basic'. diff --git a/llvm/include/llvm/Analysis/TypeMetadataUtils.h b/llvm/include/llvm/Analysis/TypeMetadataUtils.h index 3f7603142900..074c40942b06 100644 --- a/llvm/include/llvm/Analysis/TypeMetadataUtils.h +++ b/llvm/include/llvm/Analysis/TypeMetadataUtils.h @@ -22,6 +22,7 @@ namespace llvm { class CallBase; class CallInst; class Constant; +class Function; class DominatorTree; class Instruction; class Module; @@ -56,7 +57,30 @@ void findDevirtualizableCallsForTypeCheckedLoad( SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses, const CallInst *CI, DominatorTree &DT); -Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M); -} +/// Processes a Constant recursively looking into elements of arrays, structs +/// and expressions to find a trivial pointer element that is located at the +/// given offset (relative to the beginning of the whole outer Constant). +/// +/// Used for example from GlobalDCE to find an entry in a C++ vtable that +/// matches a vcall offset. +/// +/// To support Swift vtables, getPointerAtOffset can see through "relative +/// pointers", i.e. (sub-)expressions of the form of: +/// +/// @symbol = ... { +/// i32 trunc (i64 sub ( +/// i64 ptrtoint (<type> @target to i64), i64 ptrtoint (... @symbol to i64) +/// ) to i32) +/// } +/// +/// For such (sub-)expressions, getPointerAtOffset returns the @target pointer. +Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M, + Constant *TopLevelGlobal = nullptr); + +/// Finds the same "relative pointer" pattern as described above, where the +/// target is `F`, and replaces the entire pattern with a constant zero. +void replaceRelativePointerUsersWithZero(Function *F); + +} // namespace llvm #endif diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h index 47ee23e06000..1f6be0e60eb9 100644 --- a/llvm/include/llvm/Analysis/Utils/TFUtils.h +++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h @@ -104,6 +104,9 @@ Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx, struct LoggedFeatureSpec { TensorSpec Spec; Optional<std::string> LoggingName; + const std::string &getLoggingName() const { + return LoggingName ? *LoggingName : Spec.name(); + } }; /// Load the output specs. If SpecFileOverride is not empty, that path is used. @@ -170,7 +173,9 @@ public: // we can consider using bytes. char *addEntryAndGetFloatOrInt64Buffer(size_t FeatureID); - void print(raw_ostream &OS); + // Flush the content of the log to the stream, clearing the stored data in the + // process. + void flush(raw_ostream &OS); private: std::vector<LoggedFeatureSpec> FeatureSpecs; diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 90ec742f18e6..b4f38a3e976f 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -203,6 +203,15 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; const DominatorTree *DT = nullptr, bool UseInstrInfo = true); + /// Get the minimum bit size for this Value \p Op as a signed integer. + /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)). + /// Similar to the APInt::getMinSignedBits function. + unsigned ComputeMinSignedBits(const Value *Op, const DataLayout &DL, + unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); + /// This function computes the integer multiple of Base that equals V. If /// successful, it returns true and returns the multiple in Multiple. If /// unsuccessful, it returns false. Also, if V can be simplified to an @@ -549,6 +558,7 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; ConstantRange computeConstantRange(const Value *V, bool UseInstrInfo = true, AssumptionCache *AC = nullptr, const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, unsigned Depth = 0); /// Return true if this function can prove that the instruction I will @@ -573,6 +583,18 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// instruction variant of this function. bool isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB); + /// Return true if every instruction in the range (Begin, End) is + /// guaranteed to transfer execution to its static successor. \p ScanLimit + /// bounds the search to avoid scanning huge blocks. + bool isGuaranteedToTransferExecutionToSuccessor( + BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, + unsigned ScanLimit = 32); + + /// Same as previous, but with range expressed via iterator_range. + bool isGuaranteedToTransferExecutionToSuccessor( + iterator_range<BasicBlock::const_iterator> Range, + unsigned ScanLimit = 32); + /// Return true if this function can prove that the instruction I /// is executed for every iteration of the loop L. /// @@ -624,10 +646,16 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// true. If Op raises immediate UB but never creates poison or undef /// (e.g. sdiv I, 0), canCreatePoison returns false. /// + /// \p ConsiderFlags controls whether poison producing flags on the + /// instruction are considered. This can be used to see if the instruction + /// could still introduce undef or poison even without poison generating flags + /// which might be on the instruction. (i.e. could the result of + /// Op->dropPoisonGeneratingFlags() still create poison or undef) + /// /// canCreatePoison returns true if Op can create poison from non-poison /// operands. - bool canCreateUndefOrPoison(const Operator *Op); - bool canCreatePoison(const Operator *Op); + bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlags = true); + bool canCreatePoison(const Operator *Op, bool ConsiderFlags = true); /// Return true if V is poison given that ValAssumedPoison is already poison. /// For example, if ValAssumedPoison is `icmp X, 10` and V is `icmp X, 5`, @@ -744,6 +772,10 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; /// minimum/maximum flavor. CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF); + /// Return the minimum or maximum constant value for the specified integer + /// min/max flavor and type. + APInt getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth); + /// Check if the values in \p VL are select instructions that can be converted /// to a min or max (vector) intrinsic. Returns the intrinsic ID, if such a /// conversion is possible, together with a bool indicating whether all select diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index c890216c9e01..24e2318de48b 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -533,6 +533,12 @@ llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride, llvm::SmallVector<int, 16> createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs); +/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle +/// mask assuming both operands are identical. This assumes that the unary +/// shuffle will use elements from operand 0 (operand 1 will be unused). +llvm::SmallVector<int, 16> createUnaryMask(ArrayRef<int> Mask, + unsigned NumElts); + /// Concatenate a list of vectors. /// /// This function generates code that concatenate the vectors in \p Vecs into a @@ -686,10 +692,8 @@ public: if (getMember(getFactor() - 1)) return false; - // We have a group with gaps. It therefore cannot be a group of stores, - // and it can't be a reversed access, because such groups get invalidated. - assert(!getMember(0)->mayWriteToMemory() && - "Group should have been invalidated"); + // We have a group with gaps. It therefore can't be a reversed access, + // because such groups get invalidated (TODO). assert(!isReverse() && "Group should have been invalidated"); // This is a group of loads, with gaps, and without a last-member diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h index c97d9781c33b..c30165e4a97b 100644 --- a/llvm/include/llvm/AsmParser/LLLexer.h +++ b/llvm/include/llvm/AsmParser/LLLexer.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_ASMPARSER_LLLEXER_H -#define LLVM_LIB_ASMPARSER_LLLEXER_H +#ifndef LLVM_ASMPARSER_LLLEXER_H +#define LLVM_ASMPARSER_LLLEXER_H #include "LLToken.h" #include "llvm/ADT/APFloat.h" diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index 70db9218fa3d..d621c232378c 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_ASMPARSER_LLPARSER_H -#define LLVM_LIB_ASMPARSER_LLPARSER_H +#ifndef LLVM_ASMPARSER_LLPARSER_H +#define LLVM_ASMPARSER_LLPARSER_H #include "LLLexer.h" #include "llvm/ADT/Optional.h" @@ -172,9 +172,8 @@ namespace llvm { /// getGlobalVal - Get a value with the specified name or ID, creating a /// forward reference record if needed. This can return null if the value /// exists but does not have the right type. - GlobalValue *getGlobalVal(const std::string &N, Type *Ty, LocTy Loc, - bool IsCall); - GlobalValue *getGlobalVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall); + GlobalValue *getGlobalVal(const std::string &N, Type *Ty, LocTy Loc); + GlobalValue *getGlobalVal(unsigned ID, Type *Ty, LocTy Loc); /// Get a Comdat with the specified name, creating a forward reference /// record if needed. @@ -270,7 +269,6 @@ namespace llvm { bool parseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma); bool parseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc, bool &AteExtraComma); - bool parseOptionalCommaInAlloca(bool &IsInAlloca); bool parseAllocSizeArguments(unsigned &BaseSizeArg, Optional<unsigned> &HowManyArg); bool parseVScaleRangeArguments(unsigned &MinValue, unsigned &MaxValue); @@ -306,11 +304,10 @@ namespace llvm { unsigned DLLStorageClass, bool DSOLocal, GlobalVariable::ThreadLocalMode TLM, GlobalVariable::UnnamedAddr UnnamedAddr); - bool parseIndirectSymbol(const std::string &Name, LocTy NameLoc, - unsigned L, unsigned Visibility, - unsigned DLLStorageClass, bool DSOLocal, - GlobalVariable::ThreadLocalMode TLM, - GlobalVariable::UnnamedAddr UnnamedAddr); + bool parseAliasOrIFunc(const std::string &Name, LocTy NameLoc, unsigned L, + unsigned Visibility, unsigned DLLStorageClass, + bool DSOLocal, GlobalVariable::ThreadLocalMode TLM, + GlobalVariable::UnnamedAddr UnnamedAddr); bool parseComdat(); bool parseStandaloneMetadata(); bool parseNamedMetadata(); @@ -424,8 +421,8 @@ namespace llvm { /// GetVal - Get a value with the specified name or ID, creating a /// forward reference record if needed. This can return null if the value /// exists but does not have the right type. - Value *getVal(const std::string &Name, Type *Ty, LocTy Loc, bool IsCall); - Value *getVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall); + Value *getVal(const std::string &Name, Type *Ty, LocTy Loc); + Value *getVal(unsigned ID, Type *Ty, LocTy Loc); /// setInstName - After an instruction is parsed and inserted into its /// basic block, this installs its name. @@ -447,10 +444,10 @@ namespace llvm { }; bool convertValIDToValue(Type *Ty, ValID &ID, Value *&V, - PerFunctionState *PFS, bool IsCall); + PerFunctionState *PFS); Value *checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty, - Value *Val, bool IsCall); + Value *Val); bool parseConstantValue(Type *Ty, Constant *&C); bool parseValue(Type *Ty, Value *&V, PerFunctionState *PFS); diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index aa49c68fe924..f8ca054863ac 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_ASMPARSER_LLTOKEN_H -#define LLVM_LIB_ASMPARSER_LLTOKEN_H +#ifndef LLVM_ASMPARSER_LLTOKEN_H +#define LLVM_ASMPARSER_LLTOKEN_H namespace llvm { namespace lltok { @@ -190,6 +190,7 @@ enum Kind { kw_convergent, kw_dereferenceable, kw_dereferenceable_or_null, + kw_disable_sanitizer_instrumentation, kw_elementtype, kw_inaccessiblememonly, kw_inaccessiblemem_or_argmemonly, @@ -403,6 +404,9 @@ enum Kind { kw_returnDoesNotAlias, kw_noInline, kw_alwaysInline, + kw_noUnwind, + kw_mayThrow, + kw_hasUnknownCall, kw_calls, kw_callee, kw_params, diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index 34f124b5779a..61f3f27ebb47 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -248,6 +248,9 @@ HANDLE_DW_TAG(0x5103, ALTIUM_rev_carry_type, 0, ALTIUM, DW_KIND_NONE) // M16 __rom qualifier HANDLE_DW_TAG(0x5111, ALTIUM_rom, 0, ALTIUM, DW_KIND_NONE) +// LLVM +HANDLE_DW_TAG(0x6000, LLVM_annotation, 0, LLVM, DW_KIND_NONE) + // Green Hills. HANDLE_DW_TAG(0x8004, GHS_namespace, 0, GHS, DW_KIND_NONE) HANDLE_DW_TAG(0x8005, GHS_using_namespace, 0, GHS, DW_KIND_NONE) diff --git a/llvm/include/llvm/BinaryFormat/DynamicTags.def b/llvm/include/llvm/BinaryFormat/DynamicTags.def index c08f8a53bdb5..814d8b113ec4 100644 --- a/llvm/include/llvm/BinaryFormat/DynamicTags.def +++ b/llvm/include/llvm/BinaryFormat/DynamicTags.def @@ -31,6 +31,11 @@ #define PPC64_DYNAMIC_TAG_DEFINED #endif +#ifndef RISCV_DYNAMIC_TAG +#define RISCV_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value) +#define RISCV_DYNAMIC_TAG_DEFINED +#endif + #ifndef DYNAMIC_TAG_MARKER #define DYNAMIC_TAG_MARKER(name, value) DYNAMIC_TAG(name, value) #define DYNAMIC_TAG_MARKER_DEFINED @@ -213,6 +218,9 @@ PPC_DYNAMIC_TAG(PPC_OPT, 0x70000001) // Has TLS optimization. PPC64_DYNAMIC_TAG(PPC64_GLINK, 0x70000000) // Address of 32 bytes before the // first glink lazy resolver stub. +// RISC-V specific dynamic array tags. +RISCV_DYNAMIC_TAG(RISCV_VARIANT_CC, 0x70000001) + // Sun machine-independent extensions. DYNAMIC_TAG(AUXILIARY, 0x7FFFFFFD) // Shared object to load before self DYNAMIC_TAG(USED, 0x7FFFFFFE) // Same as DT_NEEDED @@ -243,3 +251,7 @@ DYNAMIC_TAG(FILTER, 0x7FFFFFFF) // Shared object to get values from #undef PPC64_DYNAMIC_TAG #undef PPC64_DYNAMIC_TAG_DEFINED #endif +#ifdef RISCV_DYNAMIC_TAG_DEFINED +#undef RISCV_DYNAMIC_TAG +#undef RISCV_DYNAMIC_TAG_DEFINED +#endif diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 6148f968cdba..a270fd399aeb 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -660,6 +660,12 @@ enum { #include "ELFRelocs/RISCV.def" }; +enum { + // Symbol may follow different calling convention than the standard calling + // convention. + STO_RISCV_VARIANT_CC = 0x80 +}; + // ELF Relocation types for S390/zSeries enum { #include "ELFRelocs/SystemZ.def" @@ -1596,6 +1602,16 @@ enum { NT_FREEBSD_PROCSTAT_AUXV = 16, }; +// OpenBSD core note types. +enum { + NT_OPENBSD_PROCINFO = 10, + NT_OPENBSD_AUXV = 11, + NT_OPENBSD_REGS = 20, + NT_OPENBSD_FPREGS = 21, + NT_OPENBSD_XFPREGS = 22, + NT_OPENBSD_WCOOKIE = 23, +}; + // AMDGPU-specific section indices. enum { SHN_AMDGPU_LDS = 0xff00, // Variable in LDS; symbol encoded like SHN_COMMON @@ -1618,6 +1634,13 @@ enum { NT_AMDGPU_METADATA = 32 }; +// LLVMOMPOFFLOAD specific notes. +enum : unsigned { + NT_LLVM_OPENMP_OFFLOAD_VERSION = 1, + NT_LLVM_OPENMP_OFFLOAD_PRODUCER = 2, + NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION = 3 +}; + enum { GNU_ABI_TAG_LINUX = 0, GNU_ABI_TAG_HURD = 1, diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def index 9f2f0540bcbd..454450950444 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def @@ -46,10 +46,6 @@ ELF_RELOC(R_RISCV_ALIGN, 43) ELF_RELOC(R_RISCV_RVC_BRANCH, 44) ELF_RELOC(R_RISCV_RVC_JUMP, 45) ELF_RELOC(R_RISCV_RVC_LUI, 46) -ELF_RELOC(R_RISCV_GPREL_I, 47) -ELF_RELOC(R_RISCV_GPREL_S, 48) -ELF_RELOC(R_RISCV_TPREL_I, 49) -ELF_RELOC(R_RISCV_TPREL_S, 50) ELF_RELOC(R_RISCV_RELAX, 51) ELF_RELOC(R_RISCV_SUB6, 52) ELF_RELOC(R_RISCV_SET6, 53) diff --git a/llvm/include/llvm/BinaryFormat/MachO.def b/llvm/include/llvm/BinaryFormat/MachO.def index 76dcc58ba048..f68ecefa6c9e 100644 --- a/llvm/include/llvm/BinaryFormat/MachO.def +++ b/llvm/include/llvm/BinaryFormat/MachO.def @@ -74,6 +74,8 @@ HANDLE_LOAD_COMMAND(LC_VERSION_MIN_TVOS, 0x0000002Fu, version_min_command) HANDLE_LOAD_COMMAND(LC_VERSION_MIN_WATCHOS, 0x00000030u, version_min_command) HANDLE_LOAD_COMMAND(LC_NOTE, 0x00000031u, note_command) HANDLE_LOAD_COMMAND(LC_BUILD_VERSION, 0x00000032u, build_version_command) +HANDLE_LOAD_COMMAND(LC_DYLD_EXPORTS_TRIE, 0x80000033u, linkedit_data_command) +HANDLE_LOAD_COMMAND(LC_DYLD_CHAINED_FIXUPS, 0x80000034u, linkedit_data_command) #endif diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index c38e64928521..0bc8c4e167d8 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This file defines manifest constants for the wasm object file format. -// See: https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md +// See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md // //===----------------------------------------------------------------------===// @@ -36,12 +36,25 @@ struct WasmObjectHeader { uint32_t Version; }; +struct WasmDylinkImportInfo { + StringRef Module; + StringRef Field; + uint32_t Flags; +}; + +struct WasmDylinkExportInfo { + StringRef Name; + uint32_t Flags; +}; + struct WasmDylinkInfo { uint32_t MemorySize; // Memory size in bytes uint32_t MemoryAlignment; // P2 alignment of memory uint32_t TableSize; // Table size in elements uint32_t TableAlignment; // P2 alignment of table std::vector<StringRef> Needed; // Shared library dependencies + std::vector<WasmDylinkImportInfo> ImportInfo; + std::vector<WasmDylinkExportInfo> ExportInfo; }; struct WasmProducerInfo { @@ -101,15 +114,9 @@ struct WasmGlobal { StringRef SymbolName; // from the "linking" section }; -struct WasmTagType { - // Kind of tag. Currently only WASM_TAG_ATTRIBUTE_EXCEPTION is possible. - uint8_t Attribute; - uint32_t SigIndex; -}; - struct WasmTag { uint32_t Index; - WasmTagType Type; + uint32_t SigIndex; StringRef SymbolName; // from the "linking" section }; @@ -122,7 +129,6 @@ struct WasmImport { WasmGlobalType Global; WasmTableType Table; WasmLimits Memory; - WasmTagType Tag; }; }; @@ -133,6 +139,7 @@ struct WasmLocalDecl { struct WasmFunction { uint32_t Index; + uint32_t SigIndex; std::vector<WasmLocalDecl> Locals; ArrayRef<uint8_t> Body; uint32_t CodeSectionOffset; @@ -284,11 +291,14 @@ enum : unsigned { // Opcodes used in synthetic functions. enum : unsigned { - WASM_OPCODE_IF = 0x04, - WASM_OPCODE_ELSE = 0x05, + WASM_OPCODE_BLOCK = 0x02, + WASM_OPCODE_BR = 0x0c, + WASM_OPCODE_BR_TABLE = 0x0e, + WASM_OPCODE_RETURN = 0x0f, WASM_OPCODE_DROP = 0x1a, WASM_OPCODE_MISC_PREFIX = 0xfc, WASM_OPCODE_MEMORY_INIT = 0x08, + WASM_OPCODE_MEMORY_FILL = 0x0b, WASM_OPCODE_DATA_DROP = 0x09, WASM_OPCODE_ATOMICS_PREFIX = 0xfe, WASM_OPCODE_ATOMIC_NOTIFY = 0x00, @@ -339,6 +349,14 @@ enum : unsigned { WASM_SYMBOL_TABLE = 0x8, }; +// Kind codes used in the custom "dylink" section +enum : unsigned { + WASM_DYLINK_MEM_INFO = 0x1, + WASM_DYLINK_NEEDED = 0x2, + WASM_DYLINK_EXPORT_INFO = 0x3, + WASM_DYLINK_IMPORT_INFO = 0x4, +}; + // Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO enum : unsigned { WASM_COMDAT_DATA = 0x0, @@ -379,6 +397,7 @@ const unsigned WASM_SYMBOL_UNDEFINED = 0x10; const unsigned WASM_SYMBOL_EXPORTED = 0x20; const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40; const unsigned WASM_SYMBOL_NO_STRIP = 0x80; +const unsigned WASM_SYMBOL_TLS = 0x100; #define WASM_RELOC(name, value) name = value, diff --git a/llvm/include/llvm/BinaryFormat/WasmTraits.h b/llvm/include/llvm/BinaryFormat/WasmTraits.h index 930ee690bcc0..bef9dd3291ca 100644 --- a/llvm/include/llvm/BinaryFormat/WasmTraits.h +++ b/llvm/include/llvm/BinaryFormat/WasmTraits.h @@ -18,10 +18,8 @@ namespace llvm { -template <typename T> struct DenseMapInfo; - // Traits for using WasmSignature in a DenseMap. -template <> struct DenseMapInfo<wasm::WasmSignature> { +template <> struct DenseMapInfo<wasm::WasmSignature, void> { static wasm::WasmSignature getEmptyKey() { wasm::WasmSignature Sig; Sig.State = wasm::WasmSignature::Empty; @@ -47,7 +45,7 @@ template <> struct DenseMapInfo<wasm::WasmSignature> { }; // Traits for using WasmGlobalType in a DenseMap -template <> struct DenseMapInfo<wasm::WasmGlobalType> { +template <> struct DenseMapInfo<wasm::WasmGlobalType, void> { static wasm::WasmGlobalType getEmptyKey() { return wasm::WasmGlobalType{1, true}; } @@ -64,7 +62,7 @@ template <> struct DenseMapInfo<wasm::WasmGlobalType> { }; // Traits for using WasmLimits in a DenseMap -template <> struct DenseMapInfo<wasm::WasmLimits> { +template <> struct DenseMapInfo<wasm::WasmLimits, void> { static wasm::WasmLimits getEmptyKey() { return wasm::WasmLimits{0xff, 0xff, 0xff}; } @@ -86,19 +84,19 @@ template <> struct DenseMapInfo<wasm::WasmLimits> { }; // Traits for using WasmTableType in a DenseMap -template <> struct DenseMapInfo<wasm::WasmTableType> { +template <> struct DenseMapInfo<wasm::WasmTableType, void> { static wasm::WasmTableType getEmptyKey() { - return wasm::WasmTableType{0, - DenseMapInfo<wasm::WasmLimits>::getEmptyKey()}; + return wasm::WasmTableType{ + 0, DenseMapInfo<wasm::WasmLimits, void>::getEmptyKey()}; } static wasm::WasmTableType getTombstoneKey() { return wasm::WasmTableType{ - 1, DenseMapInfo<wasm::WasmLimits>::getTombstoneKey()}; + 1, DenseMapInfo<wasm::WasmLimits, void>::getTombstoneKey()}; } static unsigned getHashValue(const wasm::WasmTableType &TableType) { return hash_combine( TableType.ElemType, - DenseMapInfo<wasm::WasmLimits>::getHashValue(TableType.Limits)); + DenseMapInfo<wasm::WasmLimits, void>::getHashValue(TableType.Limits)); } static bool isEqual(const wasm::WasmTableType &LHS, const wasm::WasmTableType &RHS) { diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h index 8a42d26f3f4a..cffd8618f1e3 100644 --- a/llvm/include/llvm/BinaryFormat/XCOFF.h +++ b/llvm/include/llvm/BinaryFormat/XCOFF.h @@ -28,9 +28,14 @@ namespace XCOFF { constexpr size_t FileNamePadSize = 6; constexpr size_t NameSize = 8; constexpr size_t FileHeaderSize32 = 20; +constexpr size_t FileHeaderSize64 = 24; +constexpr size_t AuxFileHeaderSize32 = 72; +constexpr size_t AuxFileHeaderSize64 = 110; constexpr size_t SectionHeaderSize32 = 40; +constexpr size_t SectionHeaderSize64 = 72; constexpr size_t SymbolTableEntrySize = 18; constexpr size_t RelocationSerializationSize32 = 10; +constexpr size_t RelocationSerializationSize64 = 14; constexpr uint16_t RelocOverflow = 65535; constexpr uint8_t AllocRegNo = 31; @@ -38,6 +43,17 @@ enum ReservedSectionNum : int16_t { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 }; enum MagicNumber : uint16_t { XCOFF32 = 0x01DF, XCOFF64 = 0x01F7 }; +// This field only exists in the XCOFF64 definition. +enum AuxHeaderFlags64 : uint16_t { + SHR_SYMTAB = 0x8000, ///< At exec time, create shared symbol table for program + ///< (main program only). + FORK_POLICY = 0x4000, ///< Forktree policy specified (main program only). + FORK_COR = 0x2000 ///< If _AOUT_FORK_POLICY is set, specify copy-on-reference + ///< if this bit is set. Specify copy-on- write otherwise. + ///< If _AOUT_FORK_POLICY is 0, this bit is reserved for + ///< future use and should be set to 0. +}; + // x_smclas field of x_csect from system header: /usr/include/syms.h /// Storage Mapping Class definitions. enum StorageMappingClass : uint8_t { diff --git a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h index de828be3bf1b..f6fc284da33f 100644 --- a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h +++ b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h @@ -42,6 +42,8 @@ struct BCDumpOptions { bool Symbolic = false; /// Print binary blobs using hex escapes. bool ShowBinaryBlobs = false; + /// Print BLOCKINFO block details. + bool DumpBlockinfo = false; BCDumpOptions(raw_ostream &OS) : OS(OS) {} }; diff --git a/llvm/include/llvm/Bitcode/BitcodeCommon.h b/llvm/include/llvm/Bitcode/BitcodeCommon.h index 6a3e74550bc4..22d1872fe49c 100644 --- a/llvm/include/llvm/Bitcode/BitcodeCommon.h +++ b/llvm/include/llvm/Bitcode/BitcodeCommon.h @@ -19,10 +19,14 @@ namespace llvm { struct AllocaPackedValues { - using Align = Bitfield::Element<unsigned, 0, 5>; - using UsedWithInAlloca = Bitfield::Element<bool, Align::NextBit, 1>; + // We increased the number of bits needed to represent alignment to be more + // than 5, but to preserve backward compatibility we store the upper bits + // separately. + using AlignLower = Bitfield::Element<unsigned, 0, 5>; + using UsedWithInAlloca = Bitfield::Element<bool, AlignLower::NextBit, 1>; using ExplicitType = Bitfield::Element<bool, UsedWithInAlloca::NextBit, 1>; using SwiftError = Bitfield::Element<bool, ExplicitType::NextBit, 1>; + using AlignUpper = Bitfield::Element<unsigned, SwiftError::NextBit, 3>; }; } // namespace llvm diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 28870afb2fcb..04eb2739cbd5 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -671,6 +671,7 @@ enum AttributeKindCodes { ATTR_KIND_SWIFT_ASYNC = 75, ATTR_KIND_NO_SANITIZE_COVERAGE = 76, ATTR_KIND_ELEMENTTYPE = 77, + ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION = 78, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h index bdfb416d9bd9..60442326d6c7 100644 --- a/llvm/include/llvm/CodeGen/Analysis.h +++ b/llvm/include/llvm/CodeGen/Analysis.h @@ -104,9 +104,12 @@ ISD::CondCode getFCmpCodeWithoutNaN(ISD::CondCode CC); /// getICmpCondCode - Return the ISD condition code corresponding to /// the given LLVM IR integer condition code. -/// ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred); +/// getICmpCondCode - Return the LLVM IR integer condition code +/// corresponding to the given ISD integer condition code. +ICmpInst::Predicate getICmpCondCode(ISD::CondCode Pred); + /// Test if the given instruction is in a position to be optimized /// with a tail-call. This roughly means that it's in a block with /// a return and there's nothing that needs to be scheduled diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index 5dea86e67d64..d7d3692877de 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -41,7 +41,6 @@ class DIEAbbrev; class DwarfDebug; class GCMetadataPrinter; class GCStrategy; -class GlobalIndirectSymbol; class GlobalObject; class GlobalValue; class GlobalVariable; @@ -708,7 +707,7 @@ public: /// ${:comment}. Targets can override this to add support for their own /// strange codes. virtual void PrintSpecial(const MachineInstr *MI, raw_ostream &OS, - const char *Code) const; + StringRef Code) const; /// Print the MachineOperand as a symbol. Targets with complex handling of /// symbol references should override the base implementation. @@ -795,8 +794,8 @@ private: void emitModuleCommandLines(Module &M); GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &S); - /// Emit GlobalAlias or GlobalIFunc. - void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol &GIS); + void emitGlobalAlias(Module &M, const GlobalAlias &GA); + void emitGlobalIFunc(Module &M, const GlobalIFunc &GI); /// This method decides whether the specified basic block requires a label. bool shouldEmitLabelForBasicBlock(const MachineBasicBlock &MBB) const; diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index e3b834ec42c3..324b7dcfb3ac 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -282,6 +283,11 @@ public: return getTLI()->getTargetMachine().getAssumedAddrSpace(V); } + std::pair<const Value *, unsigned> + getPredicatedAddrSpace(const Value *V) const { + return getTLI()->getTargetMachine().getPredicatedAddrSpace(V); + } + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const { return nullptr; @@ -363,8 +369,9 @@ public: } InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef<const Value *> Operands) { - return BaseT::getGEPCost(PointeeType, Ptr, Operands); + ArrayRef<const Value *> Operands, + TTI::TargetCostKind CostKind) { + return BaseT::getGEPCost(PointeeType, Ptr, Operands, CostKind); } unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, @@ -484,7 +491,8 @@ public: int getInlinerVectorBonusPercent() { return 150; } void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, - TTI::UnrollingPreferences &UP) { + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { // This unrolling functionality is target independent, but to provide some // motivation for its intended use, for x86: @@ -526,6 +534,15 @@ public: continue; } + if (ORE) { + ORE->emit([&]() { + return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(), + L->getHeader()) + << "advising against unrolling the loop because it " + "contains a " + << ore::NV("Call", &I); + }); + } return; } } @@ -653,6 +670,7 @@ public: } Optional<unsigned> getMaxVScale() const { return None; } + Optional<unsigned> getVScaleForTuning() const { return None; } /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or @@ -686,7 +704,7 @@ public: bool Extract) { auto *Ty = cast<FixedVectorType>(InTy); - APInt DemandedElts = APInt::getAllOnesValue(Ty->getNumElements()); + APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements()); return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); } @@ -737,8 +755,7 @@ public: unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } InstructionCost getArithmeticInstrCost( - unsigned Opcode, Type *Ty, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -1102,6 +1119,39 @@ public: return LT.first; } + InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, + int VF, + const APInt &DemandedDstElts, + TTI::TargetCostKind CostKind) { + assert(DemandedDstElts.getBitWidth() == (unsigned)VF * ReplicationFactor && + "Unexpected size of DemandedDstElts."); + + InstructionCost Cost; + + auto *SrcVT = FixedVectorType::get(EltTy, VF); + auto *ReplicatedVT = FixedVectorType::get(EltTy, VF * ReplicationFactor); + + // The Mask shuffling cost is extract all the elements of the Mask + // and insert each of them Factor times into the wide vector: + // + // E.g. an interleaved group with factor 3: + // %mask = icmp ult <8 x i32> %vec1, %vec2 + // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, + // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> + // The cost is estimated as extract all mask elements from the <8xi1> mask + // vector and insert them factor times into the <24xi1> shuffled mask + // vector. + APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF); + Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts, + /*Insert*/ false, + /*Extract*/ true); + Cost += + thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts, + /*Insert*/ true, /*Extract*/ false); + + return Cost; + } + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, @@ -1201,9 +1251,9 @@ public: // used (those corresponding to elements [0:1] and [8:9] of the unlegalized // type). The other loads are unused. // - // We only scale the cost of loads since interleaved store groups aren't - // allowed to have gaps. - if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) { + // TODO: Note that legalization can turn masked loads/stores into unmasked + // (legalized) loads/stores. This can be reflected in the cost. + if (Cost.isValid() && VecTySize > VecTyLTSize) { // The number of loads of a legal type it will take to represent a load // of the unlegalized vector type. unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize); @@ -1220,10 +1270,24 @@ public: // Scale the cost of the load by the fraction of legal instructions that // will be used. - Cost *= UsedInsts.count() / NumLegalInsts; + Cost = divideCeil(UsedInsts.count() * Cost.getValue().getValue(), + NumLegalInsts); } // Then plus the cost of interleave operation. + assert(Indices.size() <= Factor && + "Interleaved memory op has too many members"); + + const APInt DemandedAllSubElts = APInt::getAllOnes(NumSubElts); + const APInt DemandedAllResultElts = APInt::getAllOnes(NumElts); + + APInt DemandedLoadStoreElts = APInt::getZero(NumElts); + for (unsigned Index : Indices) { + assert(Index < Factor && "Invalid index for interleaved memory op"); + for (unsigned Elm = 0; Elm < NumSubElts; Elm++) + DemandedLoadStoreElts.setBit(Index + Elm * Factor); + } + if (Opcode == Instruction::Load) { // The interleave cost is similar to extract sub vectors' elements // from the wide vector, and insert them into sub vectors. @@ -1233,79 +1297,56 @@ public: // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0 // The cost is estimated as extract elements at 0, 2, 4, 6 from the // <8 x i32> vector and insert them into a <4 x i32> vector. - - assert(Indices.size() <= Factor && - "Interleaved memory op has too many members"); - - for (unsigned Index : Indices) { - assert(Index < Factor && "Invalid index for interleaved memory op"); - - // Extract elements from loaded vector for each sub vector. - for (unsigned i = 0; i < NumSubElts; i++) - Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VT, - Index + i * Factor); - } - - InstructionCost InsSubCost = 0; - for (unsigned i = 0; i < NumSubElts; i++) - InsSubCost += - thisT()->getVectorInstrCost(Instruction::InsertElement, SubVT, i); - + InstructionCost InsSubCost = + thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts, + /*Insert*/ true, /*Extract*/ false); Cost += Indices.size() * InsSubCost; + Cost += + thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts, + /*Insert*/ false, /*Extract*/ true); } else { - // The interleave cost is extract all elements from sub vectors, and + // The interleave cost is extract elements from sub vectors, and // insert them into the wide vector. // - // E.g. An interleaved store of factor 2: - // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7> - // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr - // The cost is estimated as extract all elements from both <4 x i32> - // vectors and insert into the <8 x i32> vector. - - InstructionCost ExtSubCost = 0; - for (unsigned i = 0; i < NumSubElts; i++) - ExtSubCost += - thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i); - Cost += ExtSubCost * Factor; - - for (unsigned i = 0; i < NumElts; i++) - Cost += static_cast<T *>(this) - ->getVectorInstrCost(Instruction::InsertElement, VT, i); + // E.g. An interleaved store of factor 3 with 2 members at indices 0,1: + // (using VF=4): + // %v0_v1 = shuffle %v0, %v1, <0,4,undef,1,5,undef,2,6,undef,3,7,undef> + // %gaps.mask = <true, true, false, true, true, false, + // true, true, false, true, true, false> + // call llvm.masked.store <12 x i32> %v0_v1, <12 x i32>* %ptr, + // i32 Align, <12 x i1> %gaps.mask + // The cost is estimated as extract all elements (of actual members, + // excluding gaps) from both <4 x i32> vectors and insert into the <12 x + // i32> vector. + InstructionCost ExtSubCost = + thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts, + /*Insert*/ false, /*Extract*/ true); + Cost += ExtSubCost * Indices.size(); + Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts, + /*Insert*/ true, + /*Extract*/ false); } if (!UseMaskForCond) return Cost; Type *I8Type = Type::getInt8Ty(VT->getContext()); - auto *MaskVT = FixedVectorType::get(I8Type, NumElts); - SubVT = FixedVectorType::get(I8Type, NumSubElts); - - // The Mask shuffling cost is extract all the elements of the Mask - // and insert each of them Factor times into the wide vector: - // - // E.g. an interleaved group with factor 3: - // %mask = icmp ult <8 x i32> %vec1, %vec2 - // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, - // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> - // The cost is estimated as extract all mask elements from the <8xi1> mask - // vector and insert them factor times into the <24xi1> shuffled mask - // vector. - for (unsigned i = 0; i < NumSubElts; i++) - Cost += - thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i); - for (unsigned i = 0; i < NumElts; i++) - Cost += - thisT()->getVectorInstrCost(Instruction::InsertElement, MaskVT, i); + Cost += thisT()->getReplicationShuffleCost( + I8Type, Factor, NumSubElts, + UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts, + CostKind); // The Gaps mask is invariant and created outside the loop, therefore the // cost of creating it is not accounted for here. However if we have both // a MaskForGaps and some other mask that guards the execution of the // memory access, we need to account for the cost of And-ing the two masks // inside the loop. - if (UseMaskForGaps) + if (UseMaskForGaps) { + auto *MaskVT = FixedVectorType::get(I8Type, NumElts); Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT, CostKind); + } return Cost; } @@ -1460,10 +1501,10 @@ public: Type *CondTy = RetTy->getWithNewBitWidth(1); Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); + CmpInst::ICMP_EQ, CostKind); Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); + CmpInst::ICMP_EQ, CostKind); } return Cost; } @@ -1689,26 +1730,34 @@ public: return thisT()->getMinMaxReductionCost( VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)), /*IsUnsigned=*/true, CostKind); - case Intrinsic::abs: + case Intrinsic::abs: { + // abs(X) = select(icmp(X,0),X,sub(0,X)) + Type *CondTy = RetTy->getWithNewBitWidth(1); + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; + InstructionCost Cost = 0; + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); + // TODO: Should we add an OperandValueProperties::OP_Zero property? + Cost += thisT()->getArithmeticInstrCost( + BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue); + return Cost; + } case Intrinsic::smax: case Intrinsic::smin: case Intrinsic::umax: case Intrinsic::umin: { - // abs(X) = select(icmp(X,0),X,sub(0,X)) // minmax(X,Y) = select(icmp(X,Y),X,Y) Type *CondTy = RetTy->getWithNewBitWidth(1); + bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin; + CmpInst::Predicate Pred = + IsUnsigned ? CmpInst::ICMP_UGT : CmpInst::ICMP_SGT; InstructionCost Cost = 0; - // TODO: Ideally getCmpSelInstrCost would accept an icmp condition code. - Cost += - thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); - Cost += - thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); - // TODO: Should we add an OperandValueProperties::OP_Zero property? - if (IID == Intrinsic::abs) - Cost += thisT()->getArithmeticInstrCost( - BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + Pred, CostKind); return Cost; } case Intrinsic::sadd_sat: @@ -1719,6 +1768,7 @@ public: Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat ? Intrinsic::sadd_with_overflow : Intrinsic::ssub_with_overflow; + CmpInst::Predicate Pred = CmpInst::ICMP_SGT; // SatMax -> Overflow && SumDiff < 0 // SatMin -> Overflow && SumDiff >= 0 @@ -1726,12 +1776,10 @@ public: IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, nullptr, ScalarizationCostPassed); Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); - Cost += - thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); - Cost += 2 * thisT()->getCmpSelInstrCost( - BinaryOperator::Select, RetTy, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, + Pred, CostKind); + Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, + CondTy, Pred, CostKind); return Cost; } case Intrinsic::uadd_sat: @@ -1784,23 +1832,16 @@ public: ? BinaryOperator::Add : BinaryOperator::Sub; - // LHSSign -> LHS >= 0 - // RHSSign -> RHS >= 0 - // SumSign -> Sum >= 0 - // // Add: - // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Overflow -> (Result < LHS) ^ (RHS < 0) // Sub: - // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // Overflow -> (Result < LHS) ^ (RHS > 0) InstructionCost Cost = 0; Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); - Cost += 3 * thisT()->getCmpSelInstrCost( - Instruction::ICmp, SumTy, OverflowTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); Cost += 2 * thisT()->getCmpSelInstrCost( - Instruction::Select, OverflowTy, OverflowTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); - Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy, + Instruction::ICmp, SumTy, OverflowTy, + CmpInst::ICMP_SGT, CostKind); + Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy, CostKind); return Cost; } @@ -1811,12 +1852,15 @@ public: unsigned Opcode = IID == Intrinsic::uadd_with_overflow ? BinaryOperator::Add : BinaryOperator::Sub; + CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow + ? CmpInst::ICMP_ULT + : CmpInst::ICMP_UGT; InstructionCost Cost = 0; Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); + Pred, CostKind); return Cost; } case Intrinsic::smul_with_overflow: @@ -1825,9 +1869,9 @@ public: Type *OverflowTy = RetTy->getContainedType(1); unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); + bool IsSigned = IID == Intrinsic::smul_with_overflow; - unsigned ExtOp = - IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; + unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt; TTI::CastContextHint CCH = TTI::CastContextHint::None; InstructionCost Cost = 0; @@ -1836,18 +1880,17 @@ public: thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, CCH, CostKind); - Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy, + Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy, CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); - if (IID == Intrinsic::smul_with_overflow) + if (IsSigned) Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); - Cost += - thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); + Cost += thisT()->getCmpSelInstrCost( + BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind); return Cost; } case Intrinsic::ctpop: @@ -1974,16 +2017,16 @@ public: /// \param RetTy Return value types. /// \param Tys Argument types. /// \returns The cost of Call instruction. - InstructionCost - getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { + InstructionCost getCallInstrCost(Function *F, Type *RetTy, + ArrayRef<Type *> Tys, + TTI::TargetCostKind CostKind) { return 10; } unsigned getNumberOfParts(Type *Tp) { std::pair<InstructionCost, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); - return *LT.first.getValue(); + return LT.first.isValid() ? *LT.first.getValue() : 0; } InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, @@ -2060,7 +2103,8 @@ public: // By default reductions need one shuffle per reduction level. ShuffleCost += NumReduxLevels * thisT()->getShuffleCost( TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty); - ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty); + ArithCost += + NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind); return ShuffleCost + ArithCost + thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); } diff --git a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h new file mode 100644 index 000000000000..270f935b6738 --- /dev/null +++ b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h @@ -0,0 +1,219 @@ +//===- CodeGenCommonISel.h - Common code between ISels ---------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares common utilities that are shared between SelectionDAG and +// GlobalISel frameworks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_CODEGENCOMMONISEL_H +#define LLVM_CODEGEN_CODEGENCOMMONISEL_H + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include <cassert> +namespace llvm { + +class BasicBlock; +class MachineBasicBlock; +/// Encapsulates all of the information needed to generate a stack protector +/// check, and signals to isel when initialized that one needs to be generated. +/// +/// *NOTE* The following is a high level documentation of SelectionDAG Stack +/// Protector Generation. This is now also ported be shared with GlobalISel, +/// but without any significant changes. +/// +/// High Level Overview of ISel Stack Protector Generation: +/// +/// Previously, the "stack protector" IR pass handled stack protector +/// generation. This necessitated splitting basic blocks at the IR level to +/// create the success/failure basic blocks in the tail of the basic block in +/// question. As a result of this, calls that would have qualified for the +/// sibling call optimization were no longer eligible for optimization since +/// said calls were no longer right in the "tail position" (i.e. the immediate +/// predecessor of a ReturnInst instruction). +/// +/// Since the sibling call optimization causes the callee to reuse the caller's +/// stack, if we could delay the generation of the stack protector check until +/// later in CodeGen after the sibling call decision was made, we get both the +/// tail call optimization and the stack protector check! +/// +/// A few goals in solving this problem were: +/// +/// 1. Preserve the architecture independence of stack protector generation. +/// +/// 2. Preserve the normal IR level stack protector check for platforms like +/// OpenBSD for which we support platform-specific stack protector +/// generation. +/// +/// The main problem that guided the present solution is that one can not +/// solve this problem in an architecture independent manner at the IR level +/// only. This is because: +/// +/// 1. The decision on whether or not to perform a sibling call on certain +/// platforms (for instance i386) requires lower level information +/// related to available registers that can not be known at the IR level. +/// +/// 2. Even if the previous point were not true, the decision on whether to +/// perform a tail call is done in LowerCallTo in SelectionDAG (or +/// CallLowering in GlobalISel) which occurs after the Stack Protector +/// Pass. As a result, one would need to put the relevant callinst into the +/// stack protector check success basic block (where the return inst is +/// placed) and then move it back later at ISel/MI time before the +/// stack protector check if the tail call optimization failed. The MI +/// level option was nixed immediately since it would require +/// platform-specific pattern matching. The ISel level option was +/// nixed because SelectionDAG only processes one IR level basic block at a +/// time implying one could not create a DAG Combine to move the callinst. +/// +/// To get around this problem: +/// +/// 1. SelectionDAG can only process one block at a time, we can generate +/// multiple machine basic blocks for one IR level basic block. +/// This is how we handle bit tests and switches. +/// +/// 2. At the MI level, tail calls are represented via a special return +/// MIInst called "tcreturn". Thus if we know the basic block in which we +/// wish to insert the stack protector check, we get the correct behavior +/// by always inserting the stack protector check right before the return +/// statement. This is a "magical transformation" since no matter where +/// the stack protector check intrinsic is, we always insert the stack +/// protector check code at the end of the BB. +/// +/// Given the aforementioned constraints, the following solution was devised: +/// +/// 1. On platforms that do not support ISel stack protector check +/// generation, allow for the normal IR level stack protector check +/// generation to continue. +/// +/// 2. On platforms that do support ISel stack protector check +/// generation: +/// +/// a. Use the IR level stack protector pass to decide if a stack +/// protector is required/which BB we insert the stack protector check +/// in by reusing the logic already therein. +/// +/// b. After we finish selecting the basic block, we produce the validation +/// code with one of these techniques: +/// 1) with a call to a guard check function +/// 2) with inlined instrumentation +/// +/// 1) We insert a call to the check function before the terminator. +/// +/// 2) We first find a splice point in the parent basic block +/// before the terminator and then splice the terminator of said basic +/// block into the success basic block. Then we code-gen a new tail for +/// the parent basic block consisting of the two loads, the comparison, +/// and finally two branches to the success/failure basic blocks. We +/// conclude by code-gening the failure basic block if we have not +/// code-gened it already (all stack protector checks we generate in +/// the same function, use the same failure basic block). +class StackProtectorDescriptor { +public: + StackProtectorDescriptor() = default; + + /// Returns true if all fields of the stack protector descriptor are + /// initialized implying that we should/are ready to emit a stack protector. + bool shouldEmitStackProtector() const { + return ParentMBB && SuccessMBB && FailureMBB; + } + + bool shouldEmitFunctionBasedCheckStackProtector() const { + return ParentMBB && !SuccessMBB && !FailureMBB; + } + + /// Initialize the stack protector descriptor structure for a new basic + /// block. + void initialize(const BasicBlock *BB, MachineBasicBlock *MBB, + bool FunctionBasedInstrumentation) { + // Make sure we are not initialized yet. + assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " + "already initialized!"); + ParentMBB = MBB; + if (!FunctionBasedInstrumentation) { + SuccessMBB = addSuccessorMBB(BB, MBB, /* IsLikely */ true); + FailureMBB = addSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB); + } + } + + /// Reset state that changes when we handle different basic blocks. + /// + /// This currently includes: + /// + /// 1. The specific basic block we are generating a + /// stack protector for (ParentMBB). + /// + /// 2. The successor machine basic block that will contain the tail of + /// parent mbb after we create the stack protector check (SuccessMBB). This + /// BB is visited only on stack protector check success. + void resetPerBBState() { + ParentMBB = nullptr; + SuccessMBB = nullptr; + } + + /// Reset state that only changes when we switch functions. + /// + /// This currently includes: + /// + /// 1. FailureMBB since we reuse the failure code path for all stack + /// protector checks created in an individual function. + /// + /// 2.The guard variable since the guard variable we are checking against is + /// always the same. + void resetPerFunctionState() { FailureMBB = nullptr; } + + MachineBasicBlock *getParentMBB() { return ParentMBB; } + MachineBasicBlock *getSuccessMBB() { return SuccessMBB; } + MachineBasicBlock *getFailureMBB() { return FailureMBB; } + +private: + /// The basic block for which we are generating the stack protector. + /// + /// As a result of stack protector generation, we will splice the + /// terminators of this basic block into the successor mbb SuccessMBB and + /// replace it with a compare/branch to the successor mbbs + /// SuccessMBB/FailureMBB depending on whether or not the stack protector + /// was violated. + MachineBasicBlock *ParentMBB = nullptr; + + /// A basic block visited on stack protector check success that contains the + /// terminators of ParentMBB. + MachineBasicBlock *SuccessMBB = nullptr; + + /// This basic block visited on stack protector check failure that will + /// contain a call to __stack_chk_fail(). + MachineBasicBlock *FailureMBB = nullptr; + + /// Add a successor machine basic block to ParentMBB. If the successor mbb + /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic + /// block will be created. Assign a large weight if IsLikely is true. + MachineBasicBlock *addSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + bool IsLikely, + MachineBasicBlock *SuccMBB = nullptr); +}; + +/// Find the split point at which to splice the end of BB into its success stack +/// protector check machine basic block. +/// +/// On many platforms, due to ABI constraints, terminators, even before register +/// allocation, use physical registers. This creates an issue for us since +/// physical registers at this point can not travel across basic +/// blocks. Luckily, selectiondag always moves physical registers into vregs +/// when they enter functions and moves them through a sequence of copies back +/// into the physical registers right before the terminator creating a +/// ``Terminator Sequence''. This function is searching for the beginning of the +/// terminator sequence so that we can ensure that we splice off not just the +/// terminator, but additionally the copies that move the vregs into the +/// physical registers. +MachineBasicBlock::iterator +findSplitPointForStackProtector(MachineBasicBlock *BB, + const TargetInstrInfo &TII); + +} // namespace llvm + +#endif // LLVM_CODEGEN_CODEGENCOMMONISEL_H diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index 5a4351756297..ed3cd54df272 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -48,7 +48,6 @@ Optional<CodeModel::Model> getExplicitCodeModel(); llvm::ExceptionHandling getExceptionModel(); -CodeGenFileType getFileType(); Optional<CodeGenFileType> getExplicitFileType(); CodeGenFileType getFileType(); @@ -74,6 +73,8 @@ llvm::FloatABI::ABIType getFloatABIForCalls(); llvm::FPOpFusion::FPOpFusionMode getFuseFPOps(); +SwiftAsyncFramePointerMode getSwiftAsyncFramePointer(); + bool getDontPlaceZerosInBSS(); bool getEnableGuaranteedTailCallOpt(); @@ -128,8 +129,6 @@ bool getEnableMachineFunctionSplitter(); bool getEnableDebugEntryValues(); -bool getPseudoProbeForProfiling(); - bool getValueTrackingVariableLocations(); bool getForceDwarfFrameSection(); @@ -138,6 +137,8 @@ bool getXRayOmitFunctionIndex(); bool getDebugStrictDwarf(); +unsigned getAlignLoops(); + /// Create this object with static storage to register codegen-related command /// line options. struct RegisterCodeGenFlags { diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h index b6bde0249f88..524730d53694 100644 --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -17,7 +17,6 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 6bdaddd9c6f5..9c878d4b087b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -116,6 +116,9 @@ public: /// vreg that the swifterror should be copied into after the call. Register SwiftErrorVReg; + /// Original IR callsite corresponding to this call, if available. + const CallBase *CB = nullptr; + MDNode *KnownCallees = nullptr; /// True if the call must be tail call optimized. @@ -259,7 +262,7 @@ public: /// handle the appropriate COPY (either to or from) and mark any /// relevant uses/defines as needed. virtual void assignValueToReg(Register ValVReg, Register PhysReg, - CCValAssign &VA) = 0; + CCValAssign VA) = 0; /// The specified value has been assigned to a stack /// location. Load or store it there, with appropriate extension @@ -279,11 +282,14 @@ public: } /// Handle custom values, which may be passed into one or more of \p VAs. + /// \p If the handler wants the assignments to be delayed until after + /// mem loc assignments, then it sets \p Thunk to the thunk to do the + /// assignment. /// \return The number of \p VAs that have been assigned after the first /// one, and which should therefore be skipped from further /// processing. - virtual unsigned assignCustomValue(ArgInfo &Arg, - ArrayRef<CCValAssign> VAs) { + virtual unsigned assignCustomValue(ArgInfo &Arg, ArrayRef<CCValAssign> VAs, + std::function<void()> *Thunk = nullptr) { // This is not a pure virtual method because not all targets need to worry // about custom values. llvm_unreachable("Custom values not supported"); @@ -315,7 +321,7 @@ public: /// Provides a default implementation for argument handling. void assignValueToReg(Register ValVReg, Register PhysReg, - CCValAssign &VA) override; + CCValAssign VA) override; }; /// Base class for ValueHandlers used for arguments passed to a function call, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 56459b68dce0..ff4ad4b72636 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -36,7 +36,10 @@ class GISelKnownBits; class MachineDominatorTree; class LegalizerInfo; struct LegalityQuery; +class RegisterBank; +class RegisterBankInfo; class TargetLowering; +class TargetRegisterInfo; struct PreferredTuple { LLT Ty; // The result type of the extend. @@ -54,6 +57,7 @@ struct IndexedLoadStoreMatchInfo { struct PtrAddChain { int64_t Imm; Register Base; + const RegisterBank *Bank; }; struct RegisterImmPair { @@ -68,6 +72,16 @@ struct ShiftOfShiftedLogic { uint64_t ValSum; }; +using BuildFnTy = std::function<void(MachineIRBuilder &)>; + +struct MergeTruncStoresInfo { + SmallVector<GStore *> FoundStores; + GStore *LowestIdxStore = nullptr; + Register WideSrcVal; + bool NeedBSwap = false; + bool NeedRotate = false; +}; + using OperandBuildSteps = SmallVector<std::function<void(MachineInstrBuilder &)>, 4>; struct InstructionBuildSteps { @@ -95,6 +109,8 @@ protected: GISelKnownBits *KB; MachineDominatorTree *MDT; const LegalizerInfo *LI; + const RegisterBankInfo *RBI; + const TargetRegisterInfo *TRI; public: CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, @@ -120,6 +136,22 @@ public: void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const; + /// Replace the opcode in instruction with a new opcode and inform the + /// observer of the changes. + void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const; + + /// Get the register bank of \p Reg. + /// If Reg has not been assigned a register, a register class, + /// or a register bank, then this returns nullptr. + /// + /// \pre Reg.isValid() + const RegisterBank *getRegBank(Register Reg) const; + + /// Set the register bank of \p Reg. + /// Does nothing if the RegBank is null. + /// This is the counterpart to getRegBank. + void setRegBank(Register Reg, const RegisterBank *RegBank); + /// If \p MI is COPY, try to combine it. /// Returns true if MI changed. bool tryCombineCopy(MachineInstr &MI); @@ -144,6 +176,9 @@ public: bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); + /// Match (and (load x), mask) -> zextload x + bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Combine \p MI into a pre-indexed or post-indexed load/store operation if /// legal and the surrounding code makes it useful. bool tryCombineIndexedLoadStore(MachineInstr &MI); @@ -341,6 +376,9 @@ public: bool matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src); void applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src); + /// Transform fabs(fneg(x)) to fabs(x). + bool matchCombineFAbsOfFNeg(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x). bool matchCombineTruncOfExt(MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo); @@ -445,7 +483,7 @@ public: /// Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0 bool matchOverlappingAnd(MachineInstr &MI, - std::function<void(MachineIRBuilder &)> &MatchInfo); + BuildFnTy &MatchInfo); /// \return true if \p MI is a G_AND instruction whose operands are x and y /// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.) @@ -501,8 +539,10 @@ public: /// /// And check if the tree can be replaced with a M-bit load + possibly a /// bswap. - bool matchLoadOrCombine(MachineInstr &MI, - std::function<void(MachineIRBuilder &)> &MatchInfo); + bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo); + + bool matchTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo); + void applyTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo); bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI); @@ -519,12 +559,10 @@ public: /// Use a function which takes in a MachineIRBuilder to perform a combine. /// By default, it erases the instruction \p MI from the function. - void applyBuildFn(MachineInstr &MI, - std::function<void(MachineIRBuilder &)> &MatchInfo); + void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo); /// Use a function which takes in a MachineIRBuilder to perform a combine. /// This variant does not erase \p MI after calling the build function. - void applyBuildFnNoErase(MachineInstr &MI, - std::function<void(MachineIRBuilder &)> &MatchInfo); + void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo); bool matchFunnelShiftToRotate(MachineInstr &MI); void applyFunnelShiftToRotate(MachineInstr &MI); @@ -535,21 +573,57 @@ public: /// or false constant based off of KnownBits information. bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo); - bool matchBitfieldExtractFromSExtInReg( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo); - /// Match: and (lshr x, cst), mask -> ubfx x, cst, width - bool matchBitfieldExtractFromAnd( - MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo); + /// \returns true if a G_ICMP \p MI can be replaced with its LHS based off of + /// KnownBits information. + bool + matchICmpToLHSKnownBits(MachineInstr &MI, + BuildFnTy &MatchInfo); + + /// \returns true if (and (or x, c1), c2) can be replaced with (and x, c2) + bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo); + bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, + BuildFnTy &MatchInfo); + /// Match: and (lshr x, cst), mask -> ubfx x, cst, width + bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo); + + /// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width + bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo); + + /// Match: shr (and x, n), k -> ubfx x, pos, width + bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo); + + // Helpers for reassociation: + bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, + BuildFnTy &MatchInfo); + bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, + MachineInstr *RHS, + BuildFnTy &MatchInfo); + bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, + MachineInstr *RHS, BuildFnTy &MatchInfo); /// Reassociate pointer calculations with G_ADD involved, to allow better /// addressing mode usage. - bool matchReassocPtrAdd(MachineInstr &MI, - std::function<void(MachineIRBuilder &)> &MatchInfo); - + bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo); /// Do constant folding when opportunities are exposed after MIR building. bool matchConstantFold(MachineInstr &MI, APInt &MatchInfo); + /// \returns true if it is possible to narrow the width of a scalar binop + /// feeding a G_AND instruction \p MI. + bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo); + + /// Given an G_UDIV \p MI expressing a divide by constant, return an + /// expression that implements it by multiplying by a magic number. + /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". + MachineInstr *buildUDivUsingMul(MachineInstr &MI); + /// Combine G_UDIV by constant into a multiply by magic constant. + bool matchUDivByConst(MachineInstr &MI); + void applyUDivByConst(MachineInstr &MI); + + // G_UMULH x, (1 << c)) -> x >> (bitwidth - c) + bool matchUMulHToLShr(MachineInstr &MI); + void applyUMulHToLShr(MachineInstr &MI); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); @@ -560,20 +634,21 @@ public: /// and rename: s/bool tryEmit/void emit/ bool tryEmitMemcpyInline(MachineInstr &MI); -private: - // Memcpy family optimization helpers. - bool tryEmitMemcpyInline(MachineInstr &MI, Register Dst, Register Src, - uint64_t KnownLen, Align DstAlign, Align SrcAlign, - bool IsVolatile); - bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src, - uint64_t KnownLen, uint64_t Limit, Align DstAlign, - Align SrcAlign, bool IsVolatile); - bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src, - uint64_t KnownLen, Align DstAlign, Align SrcAlign, - bool IsVolatile); - bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val, - uint64_t KnownLen, Align DstAlign, bool IsVolatile); + /// Match: + /// (G_UMULO x, 2) -> (G_UADDO x, x) + /// (G_SMULO x, 2) -> (G_SADDO x, x) + bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Transform (fadd x, fneg(y)) -> (fsub x, y) + /// (fadd fneg(x), y) -> (fsub y, x) + /// (fsub x, fneg(y)) -> (fadd x, y) + /// (fmul fneg(x), fneg(y)) -> (fmul x, y) + /// (fdiv fneg(x), fneg(y)) -> (fdiv x, y) + /// (fmad fneg(x), fneg(y), z) -> (fmad x, y, z) + /// (fma fneg(x), fneg(y), z) -> (fma x, y, z) + bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo); + +private: /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. /// diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index 1162134b2ad2..7103656365b1 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -57,9 +57,9 @@ public: bool isUnordered() const { return getMMO().isUnordered(); } /// Returns the size in bytes of the memory access. - uint64_t getMemSize() { return getMMO().getSize(); + uint64_t getMemSize() const { return getMMO().getSize(); } /// Returns the size in bits of the memory access. - uint64_t getMemSizeInBits() { return getMMO().getSizeInBits(); } + uint64_t getMemSizeInBits() const { return getMMO().getSizeInBits(); } static bool classof(const MachineInstr *MI) { switch (MI->getOpcode()) { @@ -195,6 +195,37 @@ public: } }; +/// Represents a G_PTR_ADD. +class GPtrAdd : public GenericMachineInstr { +public: + Register getBaseReg() const { return getReg(1); } + Register getOffsetReg() const { return getReg(2); } + + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_PTR_ADD; + } +}; + +/// Represents a G_IMPLICIT_DEF. +class GImplicitDef : public GenericMachineInstr { +public: + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; + } +}; + +/// Represents a G_SELECT. +class GSelect : public GenericMachineInstr { +public: + Register getCondReg() const { return getReg(1); } + Register getTrueReg() const { return getReg(2); } + Register getFalseReg() const { return getReg(3); } + + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_SELECT; + } +}; + } // namespace llvm -#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
\ No newline at end of file +#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 8eab8a5846a7..ebe16cd4f58c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -20,6 +20,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -466,9 +467,8 @@ private: bool translateSIToFP(const User &U, MachineIRBuilder &MIRBuilder) { return translateCast(TargetOpcode::G_SITOFP, U, MIRBuilder); } - bool translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) { - return true; - } + bool translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder); + bool translateSExt(const User &U, MachineIRBuilder &MIRBuilder) { return translateCast(TargetOpcode::G_SEXT, U, MIRBuilder); } @@ -586,6 +586,8 @@ private: /// stop translating such blocks early. bool HasTailCall = false; + StackProtectorDescriptor SPDescriptor; + /// Switch analysis and optimization. class GISelSwitchLowering : public SwitchCG::SwitchLowering { public: @@ -614,8 +616,34 @@ private: // * Clear the different maps. void finalizeFunction(); - // Handle emitting jump tables for each basic block. - void finalizeBasicBlock(); + // Processing steps done per block. E.g. emitting jump tables, stack + // protectors etc. Returns true if no errors, false if there was a problem + // that caused an abort. + bool finalizeBasicBlock(const BasicBlock &BB, MachineBasicBlock &MBB); + + /// Codegen a new tail for a stack protector check ParentMBB which has had its + /// tail spliced into a stack protector check success bb. + /// + /// For a high level explanation of how this fits into the stack protector + /// generation see the comment on the declaration of class + /// StackProtectorDescriptor. + /// + /// \return true if there were no problems. + bool emitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB); + + /// Codegen the failure basic block for a stack protector check. + /// + /// A failure stack protector machine basic block consists simply of a call to + /// __stack_chk_fail(). + /// + /// For a high level explanation of how this fits into the stack protector + /// generation see the comment on the declaration of class + /// StackProtectorDescriptor. + /// + /// \return true if there were no problems. + bool emitSPDescriptorFailure(StackProtectorDescriptor &SPD, + MachineBasicBlock *FailureBB); /// Get the VRegs that represent \p Val. /// Non-aggregate types have just one corresponding VReg and the list can be diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h index b1f2103da309..f6704df3f49d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h @@ -478,4 +478,4 @@ private: } // end namespace llvm -#endif // define LLVM_CODEGEN_GLOBALISEL_LEGACYLEGALIZERINFO_H +#endif // LLVM_CODEGEN_GLOBALISEL_LEGACYLEGALIZERINFO_H diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h index 44a48927d35a..8a603de2f91d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -15,6 +15,7 @@ #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H #include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" @@ -22,6 +23,7 @@ #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "legalizer" @@ -52,7 +54,8 @@ public: bool tryCombineAnyExt(MachineInstr &MI, SmallVectorImpl<MachineInstr *> &DeadInsts, - SmallVectorImpl<Register> &UpdatedDefs) { + SmallVectorImpl<Register> &UpdatedDefs, + GISelObserverWrapper &Observer) { assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); Builder.setInstrAndDebugLoc(MI); @@ -63,7 +66,11 @@ public: Register TruncSrc; if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); - Builder.buildAnyExtOrTrunc(DstReg, TruncSrc); + if (MRI.getType(DstReg) == MRI.getType(TruncSrc)) + replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs, + Observer); + else + Builder.buildAnyExtOrTrunc(DstReg, TruncSrc); UpdatedDefs.push_back(DstReg); markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; @@ -120,12 +127,14 @@ public: return false; LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); LLT SrcTy = MRI.getType(SrcReg); - APInt MaskVal = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits()); + APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits()); auto Mask = Builder.buildConstant( DstTy, MaskVal.zext(DstTy.getScalarSizeInBits())); - auto Extended = SextSrc ? Builder.buildSExtOrTrunc(DstTy, SextSrc) : - Builder.buildAnyExtOrTrunc(DstTy, TruncSrc); - Builder.buildAnd(DstReg, Extended, Mask); + if (SextSrc && (DstTy != MRI.getType(SextSrc))) + SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0); + if (TruncSrc && (DstTy != MRI.getType(TruncSrc))) + TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); + Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask); markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } @@ -176,9 +185,9 @@ public: LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); LLT SrcTy = MRI.getType(SrcReg); uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); - Builder.buildInstr( - TargetOpcode::G_SEXT_INREG, {DstReg}, - {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), SizeInBits}); + if (DstTy != MRI.getType(TruncSrc)) + TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); + Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits); markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } @@ -544,12 +553,14 @@ public: MachineIRBuilder &MIB; const LegalizerInfo &LI; - private: + // Stores the best register found in the current query so far. + Register CurrentBest = Register(); + /// Given an concat_vector op \p Concat and a start bit and size, try to /// find the origin of the value defined by that start position and size. /// - /// \returns A register if a value can be found, otherwise an empty - /// Register. + /// \returns a register with the requested size, or the current best + /// register found during the current query. Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit, unsigned Size) { assert(Size > 0); @@ -566,22 +577,22 @@ public: // FIXME: we might be able return multiple sources? Or create an // appropriate concat to make it fit. if (InRegOffset + Size > SrcSize) - return Register(); + return CurrentBest; - // If the bits exactly cover a single source, then return the operand as - // our value reg. Register SrcReg = Concat.getReg(StartSrcIdx); - if (InRegOffset == 0 && Size == SrcSize) - return SrcReg; // A source operand matches exactly. + if (InRegOffset == 0 && Size == SrcSize) { + CurrentBest = SrcReg; + return findValueFromDefImpl(SrcReg, 0, Size); + } - return findValueFromDef(SrcReg, InRegOffset, Size); + return findValueFromDefImpl(SrcReg, InRegOffset, Size); } /// Given an build_vector op \p BV and a start bit and size, try to find /// the origin of the value defined by that start position and size. /// - /// \returns A register if a value can be found, otherwise an empty - /// Register. + /// \returns a register with the requested size, or the current best + /// register found during the current query. Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit, unsigned Size) { assert(Size > 0); @@ -596,17 +607,21 @@ public: unsigned InRegOffset = StartBit % SrcSize; if (InRegOffset != 0) - return Register(); // Give up, bits don't start at a scalar source. + return CurrentBest; // Give up, bits don't start at a scalar source. if (Size < SrcSize) - return Register(); // Scalar source is too large for requested bits. + return CurrentBest; // Scalar source is too large for requested bits. // If the bits cover multiple sources evenly, then create a new // build_vector to synthesize the required size, if that's been requested. if (Size > SrcSize) { if (Size % SrcSize > 0) - return Register(); // Isn't covered exactly by sources. + return CurrentBest; // Isn't covered exactly by sources. unsigned NumSrcsUsed = Size / SrcSize; + // If we're requesting all of the sources, just return this def. + if (NumSrcsUsed == BV.getNumSources()) + return BV.getReg(0); + LLT SrcTy = MRI.getType(Src1Reg); LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy); @@ -614,7 +629,7 @@ public: LegalizeActionStep ActionStep = LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}}); if (ActionStep.Action != LegalizeActions::Legal) - return Register(); + return CurrentBest; SmallVector<Register> NewSrcs; for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed; @@ -630,8 +645,8 @@ public: /// Given an G_INSERT op \p MI and a start bit and size, try to find /// the origin of the value defined by that start position and size. /// - /// \returns A register if a value can be found, otherwise an empty - /// Register. + /// \returns a register with the requested size, or the current best + /// register found during the current query. Register findValueFromInsert(MachineInstr &MI, unsigned StartBit, unsigned Size) { assert(MI.getOpcode() == TargetOpcode::G_INSERT); @@ -685,28 +700,25 @@ public: if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) { SrcRegToUse = ContainerSrcReg; NewStartBit = StartBit; - return findValueFromDef(SrcRegToUse, NewStartBit, Size); + return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); } if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) { SrcRegToUse = InsertedReg; NewStartBit = StartBit - InsertOffset; - return findValueFromDef(SrcRegToUse, NewStartBit, Size); + if (NewStartBit == 0 && + Size == MRI.getType(SrcRegToUse).getSizeInBits()) + CurrentBest = SrcRegToUse; + return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); } // The bit range spans both the inserted and container regions. return Register(); } - public: - ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, - const LegalizerInfo &Info) - : MRI(Mri), MIB(Builder), LI(Info) {} - - /// Try to find a source of the value defined in the def \p DefReg, starting - /// at position \p StartBit with size \p Size. - /// \returns an empty Register if no value could be found, or \p DefReg if - /// if that was the best we could do. - Register findValueFromDef(Register DefReg, unsigned StartBit, - unsigned Size) { + /// Internal implementation for findValueFromDef(). findValueFromDef() + /// initializes some data like the CurrentBest register, which this method + /// and its callees rely upon. + Register findValueFromDefImpl(Register DefReg, unsigned StartBit, + unsigned Size) { MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI); // If the instruction has a single def, then simply delegate the search. // For unmerge however with multiple defs, we need to compute the offset @@ -724,7 +736,7 @@ public: } Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg(); Register SrcOriginReg = - findValueFromDef(SrcReg, StartBit + DefStartBit, Size); + findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size); if (SrcOriginReg) return SrcOriginReg; // Failed to find a further value. If the StartBit and Size perfectly @@ -732,7 +744,7 @@ public: // nothing. if (StartBit == 0 && Size == DefSize) return DefReg; - return Register(); + return CurrentBest; } case TargetOpcode::G_BUILD_VECTOR: return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit, @@ -740,41 +752,48 @@ public: case TargetOpcode::G_INSERT: return findValueFromInsert(*Def, StartBit, Size); default: - return Register(); + return CurrentBest; } } - }; - bool tryCombineUnmergeValues(GUnmerge &MI, - SmallVectorImpl<MachineInstr *> &DeadInsts, - SmallVectorImpl<Register> &UpdatedDefs, - GISelChangeObserver &Observer) { - unsigned NumDefs = MI.getNumDefs(); - Register SrcReg = MI.getSourceReg(); - MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI); - if (!SrcDef) - return false; - - LLT OpTy = MRI.getType(SrcReg); - LLT DestTy = MRI.getType(MI.getReg(0)); - unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg); + public: + ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, + const LegalizerInfo &Info) + : MRI(Mri), MIB(Builder), LI(Info) {} - Builder.setInstrAndDebugLoc(MI); + /// Try to find a source of the value defined in the def \p DefReg, starting + /// at position \p StartBit with size \p Size. + /// \returns a register with the requested size, or an empty Register if no + /// better value could be found. + Register findValueFromDef(Register DefReg, unsigned StartBit, + unsigned Size) { + CurrentBest = Register(); + Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size); + return FoundReg != DefReg ? FoundReg : Register(); + } - auto tryCombineViaValueFinder = [&]() { - ArtifactValueFinder ValueFinder(MRI, Builder, LI); + /// Try to combine the defs of an unmerge \p MI by attempting to find + /// values that provides the bits for each def reg. + /// \returns true if all the defs of the unmerge have been made dead. + bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer, + SmallVectorImpl<Register> &UpdatedDefs) { + unsigned NumDefs = MI.getNumDefs(); + LLT DestTy = MRI.getType(MI.getReg(0)); SmallBitVector DeadDefs(NumDefs); for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { Register DefReg = MI.getReg(DefIdx); - Register FoundVal = - ValueFinder.findValueFromDef(DefReg, 0, DestTy.getSizeInBits()); - if (!FoundVal || FoundVal == DefReg) + if (MRI.use_nodbg_empty(DefReg)) { + DeadDefs[DefIdx] = true; + continue; + } + Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits()); + if (!FoundVal) continue; if (MRI.getType(FoundVal) != DestTy) continue; - replaceRegOrBuildCopy(DefReg, FoundVal, MRI, Builder, UpdatedDefs, + replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs, Observer); // We only want to replace the uses, not the def of the old reg. Observer.changingInstr(MI); @@ -782,12 +801,31 @@ public: Observer.changedInstr(MI); DeadDefs[DefIdx] = true; } - if (DeadDefs.all()) { - markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); - return true; - } + return DeadDefs.all(); + } + }; + + bool tryCombineUnmergeValues(GUnmerge &MI, + SmallVectorImpl<MachineInstr *> &DeadInsts, + SmallVectorImpl<Register> &UpdatedDefs, + GISelChangeObserver &Observer) { + unsigned NumDefs = MI.getNumDefs(); + Register SrcReg = MI.getSourceReg(); + MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI); + if (!SrcDef) return false; - }; + + LLT OpTy = MRI.getType(SrcReg); + LLT DestTy = MRI.getType(MI.getReg(0)); + unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg); + + Builder.setInstrAndDebugLoc(MI); + + ArtifactValueFinder Finder(MRI, Builder, LI); + if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { + markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); + return true; + } if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) { // %0:_(<4 x s16>) = G_FOO @@ -813,7 +851,7 @@ public: return false; break; default: - return tryCombineViaValueFinder(); + return false; } auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc); @@ -845,11 +883,7 @@ public: ConvertOp, OpTy, DestTy)) { // We might have a chance to combine later by trying to combine // unmerge(cast) first - if (tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs)) - return true; - - // Try using the value finder. - return tryCombineViaValueFinder(); + return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs); } const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; @@ -1042,7 +1076,7 @@ public: default: return false; case TargetOpcode::G_ANYEXT: - Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs); + Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); break; case TargetOpcode::G_ZEXT: Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 67141f3a6326..74615c73741a 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -256,6 +256,20 @@ private: LLT SrcTy, LLT NarrowTy, unsigned ScalarOpc); + // Memcpy family legalization helpers. + LegalizeResult lowerMemset(MachineInstr &MI, Register Dst, Register Val, + uint64_t KnownLen, Align Alignment, + bool IsVolatile); + LegalizeResult lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src, + uint64_t KnownLen, Align DstAlign, + Align SrcAlign, bool IsVolatile); + LegalizeResult lowerMemcpy(MachineInstr &MI, Register Dst, Register Src, + uint64_t KnownLen, uint64_t Limit, Align DstAlign, + Align SrcAlign, bool IsVolatile); + LegalizeResult lowerMemmove(MachineInstr &MI, Register Dst, Register Src, + uint64_t KnownLen, Align DstAlign, Align SrcAlign, + bool IsVolatile); + public: /// Return the alignment to use for a stack temporary object with the given /// type. @@ -402,6 +416,9 @@ public: LegalizeResult lowerDIVREM(MachineInstr &MI); LegalizeResult lowerAbsToAddXor(MachineInstr &MI); LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI); + LegalizeResult lowerVectorReduction(MachineInstr &MI); + LegalizeResult lowerMemcpyInline(MachineInstr &MI); + LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0); }; /// Helper function that creates a libcall to the given \p Name using the given diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 4fdfabbfb161..68c14240ebc7 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -15,8 +15,6 @@ #define LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" @@ -113,6 +111,14 @@ struct LegalityQuery { LLT MemoryTy; uint64_t AlignInBits; AtomicOrdering Ordering; + + MemDesc() = default; + MemDesc(LLT MemoryTy, uint64_t AlignInBits, AtomicOrdering Ordering) + : MemoryTy(MemoryTy), AlignInBits(AlignInBits), Ordering(Ordering) {} + MemDesc(const MachineMemOperand &MMO) + : MemoryTy(MMO.getMemoryType()), + AlignInBits(MMO.getAlign().value() * 8), + Ordering(MMO.getSuccessOrdering()) {} }; /// Operations which require memory can use this to place requirements on the @@ -293,6 +299,10 @@ LegalityPredicate scalarOrEltNarrowerThan(unsigned TypeIdx, unsigned Size); /// type that's wider than the given size. LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size); +/// True iff the specified type index is a scalar whose size is not a multiple +/// of Size. +LegalityPredicate sizeNotMultipleOf(unsigned TypeIdx, unsigned Size); + /// True iff the specified type index is a scalar whose size is not a power of /// 2. LegalityPredicate sizeNotPow2(unsigned TypeIdx); @@ -348,6 +358,11 @@ LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx); /// next power of 2. LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min = 0); +/// Widen the scalar type or vector element type for the given type index to +/// next multiple of \p Size. +LegalizeMutation widenScalarOrEltToNextMultipleOf(unsigned TypeIdx, + unsigned Size); + /// Add more elements to the type for the given type index to the next power of /// 2. LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min = 0); @@ -828,6 +843,16 @@ public: LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize)); } + /// Widen the scalar to the next multiple of Size. No effect if the + /// type is not a scalar or is a multiple of Size. + LegalizeRuleSet &widenScalarToNextMultipleOf(unsigned TypeIdx, + unsigned Size) { + using namespace LegalityPredicates; + return actionIf( + LegalizeAction::WidenScalar, sizeNotMultipleOf(typeIdx(TypeIdx), Size), + LegalizeMutations::widenScalarOrEltToNextMultipleOf(TypeIdx, Size)); + } + /// Widen the scalar or vector element type to the next power of two that is /// at least MinSize. No effect if the scalar size is a power of two. LegalizeRuleSet &widenScalarOrEltToNextPow2(unsigned TypeIdx, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h new file mode 100644 index 000000000000..29575f386d7a --- /dev/null +++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h @@ -0,0 +1,165 @@ +//== llvm/CodeGen/GlobalISel/LoadStoreOpt.h - LoadStoreOpt -------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// This is an optimization pass for GlobalISel generic memory operations. +/// Specifically, it focuses on merging stores and loads to consecutive +/// addresses. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_GLOBALISEL_LOADSTOREOPT_H +#define LLVM_CODEGEN_GLOBALISEL_LOADSTOREOPT_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" + +namespace llvm { +// Forward declarations. +class MachineRegisterInfo; +class TargetTransformInfo; +namespace GISelAddressing { +/// Helper struct to store a base, index and offset that forms an address +struct BaseIndexOffset { + Register BaseReg; + Register IndexReg; + int64_t Offset = 0; + bool IsIndexSignExt = false; +}; + +/// Returns a BaseIndexOffset which describes the pointer in \p Ptr. +BaseIndexOffset getPointerInfo(Register Ptr, MachineRegisterInfo &MRI); + +/// Compute whether or not a memory access at \p MI1 aliases with an access at +/// \p MI2 \returns true if either alias/no-alias is known. Sets \p IsAlias +/// accordingly. +bool aliasIsKnownForLoadStore(const MachineInstr &MI1, const MachineInstr &MI2, + bool &IsAlias, MachineRegisterInfo &MRI); + +/// Returns true if the instruction \p MI may alias \p Other. +/// This function uses multiple strategies to detect aliasing, whereas +/// aliasIsKnownForLoadStore just looks at the addresses of load/stores and is +/// tries to reason about base/index/offsets. +bool instMayAlias(const MachineInstr &MI, const MachineInstr &Other, + MachineRegisterInfo &MRI, AliasAnalysis *AA); +} // namespace GISelAddressing + +using namespace GISelAddressing; + +class LoadStoreOpt : public MachineFunctionPass { +public: + static char ID; + +private: + /// An input function to decide if the pass should run or not + /// on the given MachineFunction. + std::function<bool(const MachineFunction &)> DoNotRunPass; + + MachineRegisterInfo *MRI; + const TargetLowering *TLI; + MachineFunction *MF; + AliasAnalysis *AA; + const LegalizerInfo *LI; + + MachineIRBuilder Builder; + + /// Initialize the field members using \p MF. + void init(MachineFunction &MF); + + class StoreMergeCandidate { + public: + // The base pointer used as the base for all stores in this candidate. + Register BasePtr; + // Our algorithm is very simple at the moment. We assume that in instruction + // order stores are writing to incremeneting consecutive addresses. So when + // we walk the block in reverse order, the next eligible store must write to + // an offset one store width lower than CurrentLowestOffset. + uint64_t CurrentLowestOffset; + SmallVector<GStore *> Stores; + // A vector of MachineInstr/unsigned pairs to denote potential aliases that + // need to be checked before the candidate is considered safe to merge. The + // unsigned value is an index into the Stores vector. The indexed store is + // the highest-indexed store that has already been checked to not have an + // alias with the instruction. We record this so we don't have to repeat + // alias checks that have been already done, only those with stores added + // after the potential alias is recorded. + SmallVector<std::pair<MachineInstr *, unsigned>> PotentialAliases; + + void addPotentialAlias(MachineInstr &MI); + + /// Reset this candidate back to an empty one. + void reset() { + Stores.clear(); + PotentialAliases.clear(); + CurrentLowestOffset = 0; + BasePtr = Register(); + } + }; + + bool isLegalOrBeforeLegalizer(const LegalityQuery &Query, + MachineFunction &MF) const; + /// If the given store is valid to be a member of the candidate, add it and + /// return true. Otherwise, returns false. + bool addStoreToCandidate(GStore &MI, StoreMergeCandidate &C); + /// Returns true if the instruction \p MI would potentially alias with any + /// stores in the candidate \p C. + bool operationAliasesWithCandidate(MachineInstr &MI, StoreMergeCandidate &C); + /// Merges the stores in the given vector into a wide store. + /// \p returns true if at least some of the stores were merged. + /// This may decide not to merge stores if heuristics predict it will not be + /// worth it. + bool mergeStores(SmallVectorImpl<GStore *> &StoresToMerge); + /// Perform a merge of all the stores in \p Stores into a single store. + /// Erases the old stores from the block when finished. + /// \returns true if merging was done. It may fail to perform a merge if + /// there are issues with materializing legal wide values. + bool doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores); + bool processMergeCandidate(StoreMergeCandidate &C); + bool mergeBlockStores(MachineBasicBlock &MBB); + bool mergeFunctionStores(MachineFunction &MF); + + /// Initialize some target-specific data structures for the store merging + /// optimization. \p AddrSpace indicates which address space to use when + /// probing the legalizer info for legal stores. + void initializeStoreMergeTargetInfo(unsigned AddrSpace = 0); + /// A map between address space numbers and a bitvector of supported stores + /// sizes. Each bit in the bitvector represents whether a store size of + /// that bit's value is legal. E.g. if bit 64 is set, then 64 bit scalar + /// stores are legal. + DenseMap<unsigned, BitVector> LegalStoreSizes; + bool IsPreLegalizer; + /// Contains instructions to be erased at the end of a block scan. + SmallSet<MachineInstr *, 16> InstsToErase; + +public: + LoadStoreOpt(); + LoadStoreOpt(std::function<bool(const MachineFunction &)>); + + StringRef getPassName() const override { return "LoadStoreOpt"; } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties() + .set(MachineFunctionProperties::Property::IsSSA); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // End namespace llvm. + +#endif diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index 4c6b47ab9bc8..e813d030eec3 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -63,7 +63,7 @@ struct ConstantMatch { int64_t &CR; ConstantMatch(int64_t &C) : CR(C) {} bool match(const MachineRegisterInfo &MRI, Register Reg) { - if (auto MaybeCst = getConstantVRegSExtVal(Reg, MRI)) { + if (auto MaybeCst = getIConstantVRegSExtVal(Reg, MRI)) { CR = *MaybeCst; return true; } @@ -73,21 +73,46 @@ struct ConstantMatch { inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); } -struct ICstRegMatch { - Register &CR; - ICstRegMatch(Register &C) : CR(C) {} +struct GCstAndRegMatch { + Optional<ValueAndVReg> &ValReg; + GCstAndRegMatch(Optional<ValueAndVReg> &ValReg) : ValReg(ValReg) {} bool match(const MachineRegisterInfo &MRI, Register Reg) { - if (auto MaybeCst = getConstantVRegValWithLookThrough( - Reg, MRI, /*LookThroughInstrs*/ true, - /*HandleFConstants*/ false)) { - CR = MaybeCst->VReg; - return true; - } - return false; + ValReg = getIConstantVRegValWithLookThrough(Reg, MRI); + return ValReg ? true : false; } }; -inline ICstRegMatch m_ICst(Register &Reg) { return ICstRegMatch(Reg); } +inline GCstAndRegMatch m_GCst(Optional<ValueAndVReg> &ValReg) { + return GCstAndRegMatch(ValReg); +} + +struct GFCstAndRegMatch { + Optional<FPValueAndVReg> &FPValReg; + GFCstAndRegMatch(Optional<FPValueAndVReg> &FPValReg) : FPValReg(FPValReg) {} + bool match(const MachineRegisterInfo &MRI, Register Reg) { + FPValReg = getFConstantVRegValWithLookThrough(Reg, MRI); + return FPValReg ? true : false; + } +}; + +inline GFCstAndRegMatch m_GFCst(Optional<FPValueAndVReg> &FPValReg) { + return GFCstAndRegMatch(FPValReg); +} + +struct GFCstOrSplatGFCstMatch { + Optional<FPValueAndVReg> &FPValReg; + GFCstOrSplatGFCstMatch(Optional<FPValueAndVReg> &FPValReg) + : FPValReg(FPValReg) {} + bool match(const MachineRegisterInfo &MRI, Register Reg) { + return (FPValReg = getFConstantSplat(Reg, MRI)) || + (FPValReg = getFConstantVRegValWithLookThrough(Reg, MRI)); + }; +}; + +inline GFCstOrSplatGFCstMatch +m_GFCstOrSplat(Optional<FPValueAndVReg> &FPValReg) { + return GFCstOrSplatGFCstMatch(FPValReg); +} /// Matcher for a specific constant value. struct SpecificConstantMatch { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 9b652d8e16bc..069f71b54328 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1537,6 +1537,14 @@ public: return buildInstr(TargetOpcode::G_XOR, {Dst}, {Src0, NegOne}); } + /// Build and insert integer negation + /// \p Zero = G_CONSTANT 0 + /// \p Res = G_SUB Zero, \p Op0 + MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0) { + auto Zero = buildConstant(Dst.getLLTTy(*getMRI()), 0); + return buildInstr(TargetOpcode::G_SUB, {Dst}, {Zero, Src0}); + } + /// Build and insert \p Res = G_CTPOP \p Op0, \p Src0 MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0) { return buildInstr(TargetOpcode::G_CTPOP, {Dst}, {Src0}); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 818475a48abb..86545b976b8d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -14,6 +14,9 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_UTILS_H #define LLVM_CODEGEN_GLOBALISEL_UTILS_H +#include "GISelWorkList.h" +#include "LostDebugLocObserver.h" +#include "llvm/ADT/APFloat.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/Register.h" @@ -44,6 +47,7 @@ class TargetRegisterClass; class ConstantInt; class ConstantFP; class APFloat; +class MachineIRBuilder; // Convenience macros for dealing with vector reduction opcodes. #define GISEL_VECREDUCE_CASES_ALL \ @@ -162,13 +166,12 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC, MachineOptimizationRemarkMissed &R); /// If \p VReg is defined by a G_CONSTANT, return the corresponding value. -Optional<APInt> getConstantVRegVal(Register VReg, - const MachineRegisterInfo &MRI); +Optional<APInt> getIConstantVRegVal(Register VReg, + const MachineRegisterInfo &MRI); -/// If \p VReg is defined by a G_CONSTANT fits in int64_t -/// returns it. -Optional<int64_t> getConstantVRegSExtVal(Register VReg, - const MachineRegisterInfo &MRI); +/// If \p VReg is defined by a G_CONSTANT fits in int64_t returns it. +Optional<int64_t> getIConstantVRegSExtVal(Register VReg, + const MachineRegisterInfo &MRI); /// Simple struct used to hold a constant integer value and a virtual /// register. @@ -176,22 +179,32 @@ struct ValueAndVReg { APInt Value; Register VReg; }; -/// If \p VReg is defined by a statically evaluable chain of -/// instructions rooted on a G_F/CONSTANT (\p LookThroughInstrs == true) -/// and that constant fits in int64_t, returns its value as well as the -/// virtual register defined by this G_F/CONSTANT. -/// When \p LookThroughInstrs == false this function behaves like -/// getConstantVRegVal. -/// When \p HandleFConstants == false the function bails on G_FCONSTANTs. -/// When \p LookThroughAnyExt == true the function treats G_ANYEXT same as -/// G_SEXT. + +/// If \p VReg is defined by a statically evaluable chain of instructions rooted +/// on a G_CONSTANT returns its APInt value and def register. Optional<ValueAndVReg> -getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, - bool LookThroughInstrs = true, - bool HandleFConstants = true, - bool LookThroughAnyExt = false); -const ConstantInt *getConstantIntVRegVal(Register VReg, - const MachineRegisterInfo &MRI); +getIConstantVRegValWithLookThrough(Register VReg, + const MachineRegisterInfo &MRI, + bool LookThroughInstrs = true); + +/// If \p VReg is defined by a statically evaluable chain of instructions rooted +/// on a G_CONSTANT or G_FCONSTANT returns its value as APInt and def register. +Optional<ValueAndVReg> getAnyConstantVRegValWithLookThrough( + Register VReg, const MachineRegisterInfo &MRI, + bool LookThroughInstrs = true, bool LookThroughAnyExt = false); + +struct FPValueAndVReg { + APFloat Value; + Register VReg; +}; + +/// If \p VReg is defined by a statically evaluable chain of instructions rooted +/// on a G_FCONSTANT returns its APFloat value and def register. +Optional<FPValueAndVReg> +getFConstantVRegValWithLookThrough(Register VReg, + const MachineRegisterInfo &MRI, + bool LookThroughInstrs = true); + const ConstantFP* getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI); @@ -254,6 +267,14 @@ Optional<APFloat> ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI); +/// Tries to constant fold a vector binop with sources \p Op1 and \p Op2. +/// If successful, returns the G_BUILD_VECTOR representing the folded vector +/// constant. \p MIB should have an insertion point already set to create new +/// G_CONSTANT instructions as needed. +Optional<MachineInstr *> +ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, const Register Op2, + const MachineRegisterInfo &MRI, MachineIRBuilder &MIB); + Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1, uint64_t Imm, const MachineRegisterInfo &MRI); @@ -261,6 +282,11 @@ Optional<APFloat> ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy, Register Src, const MachineRegisterInfo &MRI); +/// Tries to constant fold a G_CTLZ operation on \p Src. If \p Src is a vector +/// then it tries to do an element-wise constant fold. +Optional<SmallVector<unsigned>> +ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI); + /// Test if the given value is known to have exactly one bit set. This differs /// from computeKnownBits in that it doesn't necessarily determine which bit is /// set. @@ -346,15 +372,23 @@ Optional<int> getSplatIndex(MachineInstr &MI); Optional<int64_t> getBuildVectorConstantSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI); +/// Returns a floating point scalar constant of a build vector splat if it +/// exists. When \p AllowUndef == true some elements can be undef but not all. +Optional<FPValueAndVReg> getFConstantSplat(Register VReg, + const MachineRegisterInfo &MRI, + bool AllowUndef = true); + /// Return true if the specified instruction is a G_BUILD_VECTOR or /// G_BUILD_VECTOR_TRUNC where all of the elements are 0 or undef. bool isBuildVectorAllZeros(const MachineInstr &MI, - const MachineRegisterInfo &MRI); + const MachineRegisterInfo &MRI, + bool AllowUndef = false); /// Return true if the specified instruction is a G_BUILD_VECTOR or /// G_BUILD_VECTOR_TRUNC where all of the elements are ~0 or undef. bool isBuildVectorAllOnes(const MachineInstr &MI, - const MachineRegisterInfo &MRI); + const MachineRegisterInfo &MRI, + bool AllowUndef = false); /// \returns a value when \p MI is a vector splat. The splat can be either a /// Register or a constant. @@ -378,6 +412,17 @@ bool isBuildVectorAllOnes(const MachineInstr &MI, Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI); +/// Determines if \p MI defines a constant integer or a build vector of +/// constant integers. Treats undef values as constants. +bool isConstantOrConstantVector(MachineInstr &MI, + const MachineRegisterInfo &MRI); + +/// Determines if \p MI defines a constant integer or a splat vector of +/// constant integers. +/// \returns the scalar constant or None. +Optional<APInt> isConstantOrConstantSplatVector(MachineInstr &MI, + const MachineRegisterInfo &MRI); + /// Attempt to match a unary predicate against a scalar/splat constant or every /// element of a constant G_BUILD_VECTOR. If \p ConstVal is null, the source /// value was undef. @@ -398,5 +443,14 @@ int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP); bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI); +using SmallInstListTy = GISelWorkList<4>; +void saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI, + LostDebugLocObserver *LocObserver, + SmallInstListTy &DeadInstChain); +void eraseInstrs(ArrayRef<MachineInstr *> DeadInstrs, MachineRegisterInfo &MRI, + LostDebugLocObserver *LocObserver = nullptr); +void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI, + LostDebugLocObserver *LocObserver = nullptr); + } // End namespace llvm. #endif diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 6803f4d76cf0..fd106f55a43d 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1260,6 +1260,11 @@ static const int FIRST_TARGET_STRICTFP_OPCODE = BUILTIN_OP_END + 400; /// be used with SelectionDAG::getMemIntrinsicNode. static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END + 500; +/// Whether this is bitwise logic opcode. +inline bool isBitwiseLogicOp(unsigned Opcode) { + return Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR; +} + /// Get underlying scalar opcode for VECREDUCE opcode. /// For example ISD::AND for ISD::VECREDUCE_AND. NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode); @@ -1267,6 +1272,12 @@ NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode); /// Whether this is a vector-predicated Opcode. bool isVPOpcode(unsigned Opcode); +/// Whether this is a vector-predicated binary operation opcode. +bool isVPBinaryOp(unsigned Opcode); + +/// Whether this is a vector-predicated reduction opcode. +bool isVPReduction(unsigned Opcode); + /// The operand position of the vector mask. Optional<unsigned> getVPMaskIdx(unsigned Opcode); diff --git a/llvm/include/llvm/CodeGen/IndirectThunks.h b/llvm/include/llvm/CodeGen/IndirectThunks.h index 74973f38bc79..90f9912f0ee0 100644 --- a/llvm/include/llvm/CodeGen/IndirectThunks.h +++ b/llvm/include/llvm/CodeGen/IndirectThunks.h @@ -62,7 +62,7 @@ void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI, AttrBuilder B; B.addAttribute(llvm::Attribute::NoUnwind); B.addAttribute(llvm::Attribute::Naked); - F->addAttributes(llvm::AttributeList::FunctionIndex, B); + F->addFnAttrs(B); // Populate our function a bit so that we can verify. BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); diff --git a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h index 81b0025fdddc..c22f9d49f374 100644 --- a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h +++ b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h @@ -24,6 +24,9 @@ namespace { // delete it all as dead code, even with whole program optimization, // yet is effectively a NO-OP. As the compiler isn't smart enough // to know that getenv() never returns -1, this will do the job. + // This is so that globals in the translation units where these functions + // are defined are forced to be initialized, populating various + // registries. if (std::getenv("bar") != (char*) -1) return; diff --git a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h index 1b13ff53ac85..d615a5db4504 100644 --- a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h +++ b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h @@ -27,6 +27,9 @@ namespace { // delete it all as dead code, even with whole program optimization, // yet is effectively a NO-OP. As the compiler isn't smart enough // to know that getenv() never returns -1, this will do the job. + // This is so that globals in the translation units where these functions + // are defined are forced to be initialized, populating various + // registries. if (std::getenv("bar") != (char*) -1) return; diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h index c2b158ac1b7f..923a45821dd4 100644 --- a/llvm/include/llvm/CodeGen/LiveInterval.h +++ b/llvm/include/llvm/CodeGen/LiveInterval.h @@ -521,11 +521,11 @@ namespace llvm { removeSegment(S.start, S.end, RemoveDeadValNo); } - /// Remove segment pointed to by iterator @p I from this range. This does - /// not remove dead value numbers. - iterator removeSegment(iterator I) { - return segments.erase(I); - } + /// Remove segment pointed to by iterator @p I from this range. + iterator removeSegment(iterator I, bool RemoveDeadValNo = false); + + /// Mark \p ValNo for deletion if no segments in this range use it. + void removeValNoIfDead(VNInfo *ValNo); /// Query Liveness at Idx. /// The sub-instruction slot of Idx doesn't matter, only the instruction diff --git a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h index 4ebe0f2dcfd8..3b6a4a379d72 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h +++ b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h @@ -114,12 +114,19 @@ public: const LiveRange *LR = nullptr; LiveRange::const_iterator LRI; ///< current position in LR ConstSegmentIter LiveUnionI; ///< current position in LiveUnion - Optional<SmallVector<LiveInterval *, 4>> InterferingVRegs; + SmallVector<LiveInterval *, 4> InterferingVRegs; bool CheckedFirstInterference = false; bool SeenAllInterferences = false; unsigned Tag = 0; unsigned UserTag = 0; + // Count the virtual registers in this union that interfere with this + // query's live virtual register, up to maxInterferingRegs. + unsigned collectInterferingVRegs(unsigned MaxInterferingRegs); + + // Was this virtual register visited during collectInterferingVRegs? + bool isSeenInterference(LiveInterval *VirtReg) const; + public: Query() = default; Query(const LiveRange &LR, const LiveIntervalUnion &LIU) @@ -131,7 +138,7 @@ public: const LiveIntervalUnion &NewLiveUnion) { LiveUnion = &NewLiveUnion; LR = &NewLR; - InterferingVRegs = None; + InterferingVRegs.clear(); CheckedFirstInterference = false; SeenAllInterferences = false; Tag = NewLiveUnion.getTag(); @@ -151,20 +158,12 @@ public: // Does this live virtual register interfere with the union? bool checkInterference() { return collectInterferingVRegs(1); } - // Count the virtual registers in this union that interfere with this - // query's live virtual register, up to maxInterferingRegs. - unsigned collectInterferingVRegs( - unsigned MaxInterferingRegs = std::numeric_limits<unsigned>::max()); - - // Was this virtual register visited during collectInterferingVRegs? - bool isSeenInterference(LiveInterval *VirtReg) const; - - // Did collectInterferingVRegs collect all interferences? - bool seenAllInterferences() const { return SeenAllInterferences; } - // Vector generated by collectInterferingVRegs. - const SmallVectorImpl<LiveInterval*> &interferingVRegs() const { - return *InterferingVRegs; + const SmallVectorImpl<LiveInterval *> &interferingVRegs( + unsigned MaxInterferingRegs = std::numeric_limits<unsigned>::max()) { + if (!SeenAllInterferences || MaxInterferingRegs < InterferingVRegs.size()) + collectInterferingVRegs(MaxInterferingRegs); + return InterferingVRegs; } }; diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h index 9b0667bbbeb0..dee316677b25 100644 --- a/llvm/include/llvm/CodeGen/LiveVariables.h +++ b/llvm/include/llvm/CodeGen/LiveVariables.h @@ -188,6 +188,12 @@ public: //===--------------------------------------------------------------------===// // API to update live variable information + /// Recompute liveness from scratch for a virtual register \p Reg that is + /// known to have a single def that dominates all uses. This can be useful + /// after removing some uses of \p Reg. It is not necessary for the whole + /// machine function to be in SSA form. + void recomputeForSingleDefVirtReg(Register Reg); + /// replaceKillInstruction - Update register kill info by replacing a kill /// instruction with a new one. void replaceKillInstruction(Register Reg, MachineInstr &OldMI, diff --git a/llvm/include/llvm/CodeGen/LowLevelType.h b/llvm/include/llvm/CodeGen/LowLevelType.h index 40985e16b37a..922f93d2e598 100644 --- a/llvm/include/llvm/CodeGen/LowLevelType.h +++ b/llvm/include/llvm/CodeGen/LowLevelType.h @@ -16,8 +16,8 @@ #ifndef LLVM_CODEGEN_LOWLEVELTYPE_H #define LLVM_CODEGEN_LOWLEVELTYPE_H +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/LowLevelTypeImpl.h" -#include "llvm/Support/MachineValueType.h" namespace llvm { @@ -31,6 +31,7 @@ LLT getLLTForType(Type &Ty, const DataLayout &DL); /// Get a rough equivalent of an MVT for a given LLT. MVT can't distinguish /// pointers, so these will convert to a plain integer. MVT getMVTForLLT(LLT Ty); +EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx); /// Get a rough equivalent of an LLT for a given MVT. LLT does not yet support /// scalarable vector types, and will assert if used. diff --git a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h index 6137411b6dba..deb6b37a9bcf 100644 --- a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h +++ b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h @@ -57,6 +57,10 @@ public: assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); } + StringRef getPassName() const override { + return "Add FS discriminators in MIR"; + } + /// getNumFSBBs() - Return the number of machine BBs that have FS samples. unsigned getNumFSBBs(); diff --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h index 9cb92091db50..12c90600f6df 100644 --- a/llvm/include/llvm/CodeGen/MIRFormatter.h +++ b/llvm/include/llvm/CodeGen/MIRFormatter.h @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/MIRFormatter.h -----------------------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/CodeGen/MIRSampleProfile.h b/llvm/include/llvm/CodeGen/MIRSampleProfile.h new file mode 100644 index 000000000000..2503524ccfdf --- /dev/null +++ b/llvm/include/llvm/CodeGen/MIRSampleProfile.h @@ -0,0 +1,76 @@ +//===----- MIRSampleProfile.h: SampleFDO Support in MIR ---*- c++ -*-------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the supoorting functions for machine level Sample FDO +// loader. This is used in Flow Sensitive SampelFDO. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MIRSAMPLEPROFILE_H +#define LLVM_CODEGEN_MIRSAMPLEPROFILE_H + +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/ProfileData/SampleProfReader.h" + +#include <cassert> + +namespace llvm { + +using namespace sampleprof; + +class MIRProfileLoader; +class MIRProfileLoaderPass : public MachineFunctionPass { + MachineFunction *MF; + std::string ProfileFileName; + FSDiscriminatorPass P; + unsigned LowBit; + unsigned HighBit; + +public: + static char ID; + /// FS bits will only use the '1' bits in the Mask. + MIRProfileLoaderPass(std::string FileName = "", + std::string RemappingFileName = "", + FSDiscriminatorPass P = FSDiscriminatorPass::Pass1); + + /// getMachineFunction - Return the last machine function computed. + const MachineFunction *getMachineFunction() const { return MF; } + + StringRef getPassName() const override { return "SampleFDO loader in MIR"; } + +private: + void init(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &) override; + bool doInitialization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + std::unique_ptr<MIRProfileLoader> MIRSampleLoader; + /// Hold the information of the basic block frequency. + MachineBlockFrequencyInfo *MBFI; +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_MIRSAMPLEPROFILE_H diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h index e7428e7ad260..b6d7c2487126 100644 --- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h +++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h @@ -694,6 +694,7 @@ struct MachineFunction { // Register information bool TracksRegLiveness = false; bool HasWinCFI = false; + bool FailsVerification = false; std::vector<VirtualRegisterDefinition> VirtualRegisters; std::vector<MachineFunctionLiveIn> LiveIns; Optional<std::vector<FlowStringValue>> CalleeSavedRegisters; @@ -722,6 +723,7 @@ template <> struct MappingTraits<MachineFunction> { YamlIO.mapOptional("failedISel", MF.FailedISel, false); YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false); YamlIO.mapOptional("hasWinCFI", MF.HasWinCFI, false); + YamlIO.mapOptional("failsVerification", MF.FailsVerification, false); YamlIO.mapOptional("registers", MF.VirtualRegisters, std::vector<VirtualRegisterDefinition>()); YamlIO.mapOptional("liveins", MF.LiveIns, diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h index ac0cc70744d1..67544779f34c 100644 --- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -153,7 +153,18 @@ enum class MachineCombinerPattern { FMLSv4f32_OP1, FMLSv4f32_OP2, FMLSv4i32_indexed_OP1, - FMLSv4i32_indexed_OP2 + FMLSv4i32_indexed_OP2, + + FMULv2i32_indexed_OP1, + FMULv2i32_indexed_OP2, + FMULv2i64_indexed_OP1, + FMULv2i64_indexed_OP2, + FMULv4i16_indexed_OP1, + FMULv4i16_indexed_OP2, + FMULv4i32_indexed_OP1, + FMULv4i32_indexed_OP2, + FMULv8i16_indexed_OP1, + FMULv8i16_indexed_OP2, }; } // end namespace llvm diff --git a/llvm/include/llvm/CodeGen/MachineDominators.h b/llvm/include/llvm/CodeGen/MachineDominators.h index 46bf73cdd7b6..f749e9ff7e0a 100644 --- a/llvm/include/llvm/CodeGen/MachineDominators.h +++ b/llvm/include/llvm/CodeGen/MachineDominators.h @@ -36,6 +36,7 @@ extern template class DomTreeNodeBase<MachineBasicBlock>; extern template class DominatorTreeBase<MachineBasicBlock, false>; // DomTree extern template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTree +using MachineDomTree = DomTreeBase<MachineBasicBlock>; using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>; //===------------------------------------- @@ -43,8 +44,6 @@ using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>; /// compute a normal dominator tree. /// class MachineDominatorTree : public MachineFunctionPass { - using DomTreeT = DomTreeBase<MachineBasicBlock>; - /// Helper structure used to hold all the basic blocks /// involved in the split of a critical edge. struct CriticalEdge { @@ -67,7 +66,7 @@ class MachineDominatorTree : public MachineFunctionPass { mutable SmallSet<MachineBasicBlock *, 32> NewBBs; /// The DominatorTreeBase that is used to compute a normal dominator tree. - std::unique_ptr<DomTreeT> DT; + std::unique_ptr<MachineDomTree> DT; /// Apply all the recorded critical edges to the DT. /// This updates the underlying DT information in a way that uses @@ -84,8 +83,9 @@ public: calculate(MF); } - DomTreeT &getBase() { - if (!DT) DT.reset(new DomTreeT()); + MachineDomTree &getBase() { + if (!DT) + DT.reset(new MachineDomTree()); applySplitCriticalEdges(); return *DT; } @@ -112,6 +112,12 @@ public: return DT->dominates(A, B); } + void getDescendants(MachineBasicBlock *A, + SmallVectorImpl<MachineBasicBlock *> &Result) { + applySplitCriticalEdges(); + DT->getDescendants(A, Result); + } + bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { applySplitCriticalEdges(); return DT->dominates(A, B); diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 28a59703dc60..5df468102a8a 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -342,6 +342,8 @@ public: : StackAlignment(assumeAligned(StackAlignment)), StackRealignable(StackRealignable), ForcedRealign(ForcedRealign) {} + MachineFrameInfo(const MachineFrameInfo &) = delete; + /// Return true if there are any stack objects in this function. bool hasStackObjects() const { return !Objects.empty(); } diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 786fe908f68f..dcbd19ac6b5a 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -149,6 +149,9 @@ public: // all sizes attached to them have been eliminated. // TiedOpsRewritten: The twoaddressinstruction pass will set this flag, it // means that tied-def have been rewritten to meet the RegConstraint. + // FailsVerification: Means that the function is not expected to pass machine + // verification. This can be set by passes that introduce known problems that + // have not been fixed yet. enum class Property : unsigned { IsSSA, NoPHIs, @@ -159,7 +162,8 @@ public: RegBankSelected, Selected, TiedOpsRewritten, - LastProperty = TiedOpsRewritten, + FailsVerification, + LastProperty = FailsVerification, }; bool hasProperty(Property P) const { @@ -227,7 +231,7 @@ struct LandingPadInfo { : LandingPadBlock(MBB) {} }; -class MachineFunction { +class LLVM_EXTERNAL_VISIBILITY MachineFunction { Function &F; const LLVMTargetMachine &Target; const TargetSubtargetInfo *STI; @@ -536,6 +540,14 @@ public: /// (or DBG_PHI). void finalizeDebugInstrRefs(); + /// Returns true if the function's variable locations should be tracked with + /// instruction referencing. + bool useDebugInstrRef() const; + + /// A reserved operand number representing the instructions memory operand, + /// for instructions that have a stack spill fused into them. + const static unsigned int DebugOperandMemNumber; + MachineFunction(Function &F, const LLVMTargetMachine &Target, const TargetSubtargetInfo &STI, unsigned FunctionNum, MachineModuleInfo &MMI); diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 757907f6d887..0ac934e208b6 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -517,7 +517,7 @@ public: SmallSet<Register, 4> getUsedDebugRegs() const { assert(isDebugValue() && "not a DBG_VALUE*"); SmallSet<Register, 4> UsedRegs; - for (auto MO : debug_operands()) + for (const auto &MO : debug_operands()) if (MO.isReg() && MO.getReg()) UsedRegs.insert(MO.getReg()); return UsedRegs; @@ -1331,6 +1331,7 @@ public: case TargetOpcode::LIFETIME_START: case TargetOpcode::LIFETIME_END: case TargetOpcode::PSEUDO_PROBE: + case TargetOpcode::ARITH_FENCE: return true; } } @@ -1859,17 +1860,6 @@ public: } } - PseudoProbeAttributes getPseudoProbeAttribute() const { - assert(isPseudoProbe() && "Must be a pseudo probe instruction"); - return (PseudoProbeAttributes)getOperand(3).getImm(); - } - - void addPseudoProbeAttribute(PseudoProbeAttributes Attr) { - assert(isPseudoProbe() && "Must be a pseudo probe instruction"); - MachineOperand &AttrOperand = getOperand(3); - AttrOperand.setImm(AttrOperand.getImm() | (uint32_t)Attr); - } - private: /// If this instruction is embedded into a MachineFunction, return the /// MachineRegisterInfo object for the current function, otherwise diff --git a/llvm/include/llvm/CodeGen/MachineMemOperand.h b/llvm/include/llvm/CodeGen/MachineMemOperand.h index 07b8e5ebcc1d..00080b171974 100644 --- a/llvm/include/llvm/CodeGen/MachineMemOperand.h +++ b/llvm/include/llvm/CodeGen/MachineMemOperand.h @@ -282,17 +282,7 @@ public: /// success and failure orderings for an atomic operation. (For operations /// other than cmpxchg, this is equivalent to getSuccessOrdering().) AtomicOrdering getMergedOrdering() const { - AtomicOrdering Ordering = getSuccessOrdering(); - AtomicOrdering FailureOrdering = getFailureOrdering(); - if (FailureOrdering == AtomicOrdering::SequentiallyConsistent) - return AtomicOrdering::SequentiallyConsistent; - if (FailureOrdering == AtomicOrdering::Acquire) { - if (Ordering == AtomicOrdering::Monotonic) - return AtomicOrdering::Acquire; - if (Ordering == AtomicOrdering::Release) - return AtomicOrdering::AcquireRelease; - } - return Ordering; + return getMergedAtomicOrdering(getSuccessOrdering(), getFailureOrdering()); } bool isLoad() const { return FlagVals & MOLoad; } diff --git a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h index 8cc5909c40b7..285b858c96cb 100644 --- a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h +++ b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h @@ -118,6 +118,12 @@ public: : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis, PassName, RemarkName, Loc, MBB) {} + MachineOptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName, + const MachineInstr *MI) + : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis, + PassName, RemarkName, MI->getDebugLoc(), + MI->getParent()) {} + static bool classof(const DiagnosticInfo *DI) { return DI->getKind() == DK_MachineOptimizationRemarkAnalysis; } diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index ca3dd992bbd5..dbabfe5f0f32 100644 --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -821,7 +821,7 @@ public: /// deleted during LiveDebugVariables analysis. void markUsesInDebugValueAsUndef(Register Reg) const; - /// updateDbgUsersToReg - Update a collection of DBG_VALUE instructions + /// updateDbgUsersToReg - Update a collection of debug instructions /// to refer to the designated register. void updateDbgUsersToReg(MCRegister OldReg, MCRegister NewReg, ArrayRef<MachineInstr *> Users) const { @@ -829,21 +829,34 @@ public: for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo()); RUI.isValid(); ++RUI) OldRegUnits.insert(*RUI); - for (MachineInstr *MI : Users) { - assert(MI->isDebugValue()); - for (auto &Op : MI->debug_operands()) { - if (Op.isReg()) { - for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo()); - RUI.isValid(); ++RUI) { - if (OldRegUnits.contains(*RUI)) { - Op.setReg(NewReg); - break; - } + + // If this operand is a register, check whether it overlaps with OldReg. + // If it does, replace with NewReg. + auto UpdateOp = [this, &NewReg, &OldReg, &OldRegUnits](MachineOperand &Op) { + if (Op.isReg()) { + for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo()); + RUI.isValid(); ++RUI) { + if (OldRegUnits.contains(*RUI)) { + Op.setReg(NewReg); + break; } } } - assert(MI->hasDebugOperandForReg(NewReg) && - "Expected debug value to have some overlap with OldReg"); + }; + + // Iterate through (possibly several) operands to DBG_VALUEs and update + // each. For DBG_PHIs, only one operand will be present. + for (MachineInstr *MI : Users) { + if (MI->isDebugValue()) { + for (auto &Op : MI->debug_operands()) + UpdateOp(Op); + assert(MI->hasDebugOperandForReg(NewReg) && + "Expected debug value to have some overlap with OldReg"); + } else if (MI->isDebugPHI()) { + UpdateOp(MI->getOperand(0)); + } else { + llvm_unreachable("Non-DBG_VALUE, Non-DBG_PHI debug instr updated"); + } } } @@ -964,7 +977,7 @@ public: MCRegister getLiveInPhysReg(Register VReg) const; /// getLiveInVirtReg - If PReg is a live-in physical register, return the - /// corresponding live-in physical register. + /// corresponding live-in virtual register. Register getLiveInVirtReg(MCRegister PReg) const; /// EmitLiveInCopies - Emit copies to initialize livein virtual registers diff --git a/llvm/include/llvm/CodeGen/MacroFusion.h b/llvm/include/llvm/CodeGen/MacroFusion.h index 3a140fe63fde..ea2c7a5faae3 100644 --- a/llvm/include/llvm/CodeGen/MacroFusion.h +++ b/llvm/include/llvm/CodeGen/MacroFusion.h @@ -23,6 +23,8 @@ class MachineInstr; class ScheduleDAGMutation; class TargetInstrInfo; class TargetSubtargetInfo; +class ScheduleDAGInstrs; +class SUnit; /// Check if the instr pair, FirstMI and SecondMI, should be fused /// together. Given SecondMI, when FirstMI is unspecified, then check if @@ -32,6 +34,18 @@ using ShouldSchedulePredTy = std::function<bool(const TargetInstrInfo &TII, const MachineInstr *FirstMI, const MachineInstr &SecondMI)>; +/// Checks if the number of cluster edges between SU and its predecessors is +/// less than FuseLimit +bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit); + +/// Create an artificial edge between FirstSU and SecondSU. +/// Make data dependencies from the FirstSU also dependent on the SecondSU to +/// prevent them from being scheduled between the FirstSU and the SecondSU +/// and vice-versa. +/// Fusing more than 2 instructions is not currently supported. +bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, + SUnit &SecondSU); + /// Create a DAG scheduling mutation to pair instructions back to back /// for instructions that benefit according to the target-specific /// shouldScheduleAdjacent predicate function. diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index da1bab718948..d5ad12fadfa0 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -37,6 +37,10 @@ class raw_ostream; // List of target independent CodeGen pass IDs. namespace llvm { + + /// AtomicExpandPass - At IR level this pass replace atomic instructions with + /// __atomic_* library calls, or target specific instruction which implement the + /// same semantics in a way which better fits the target backend. FunctionPass *createAtomicExpandPass(); /// createUnreachableBlockEliminationPass - The LLVM code generator does not @@ -171,6 +175,9 @@ namespace llvm { /// This pass adds flow sensitive discriminators. extern char &MIRAddFSDiscriminatorsID; + /// This pass reads flow sensitive profile. + extern char &MIRProfileLoaderPassID; + /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. /// @@ -513,6 +520,11 @@ namespace llvm { FunctionPass * createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P); + /// Read Flow Sensitive Profile. + FunctionPass *createMIRProfileLoaderPass(std::string File, + std::string RemappingFile, + sampleprof::FSDiscriminatorPass P); + /// Creates MIR Debugify pass. \see MachineDebugify.cpp ModulePass *createDebugifyMachineModulePass(); diff --git a/llvm/include/llvm/CodeGen/RegAllocCommon.h b/llvm/include/llvm/CodeGen/RegAllocCommon.h index 39b77d919370..757ca8e112ee 100644 --- a/llvm/include/llvm/CodeGen/RegAllocCommon.h +++ b/llvm/include/llvm/CodeGen/RegAllocCommon.h @@ -1,9 +1,8 @@ //===- RegAllocCommon.h - Utilities shared between allocators ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/CodeGen/RegisterScavenging.h b/llvm/include/llvm/CodeGen/RegisterScavenging.h index 4f48ea2dc8e8..218e05f6eb6b 100644 --- a/llvm/include/llvm/CodeGen/RegisterScavenging.h +++ b/llvm/include/llvm/CodeGen/RegisterScavenging.h @@ -211,9 +211,6 @@ private: /// Initialize RegisterScavenger. void init(MachineBasicBlock &MBB); - /// Mark live-in registers of basic block as used. - void setLiveInsUsed(const MachineBasicBlock &MBB); - /// Spill a register after position \p After and reload it before position /// \p UseMI. ScavengedInfo &spill(Register Reg, const TargetRegisterClass &RC, int SPAdj, diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 948a4763b872..5a3f4e9a23ff 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -531,7 +531,7 @@ public: } #ifndef NDEBUG - void VerifyDAGDiverence(); + void VerifyDAGDivergence(); #endif /// This iterates over the nodes in the SelectionDAG, folding @@ -621,8 +621,8 @@ public: SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget = false, bool IsOpaque = false) { - return getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, - VT, IsTarget, IsOpaque); + return getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT, + IsTarget, IsOpaque); } SDValue getConstant(const ConstantInt &Val, const SDLoc &DL, EVT VT, @@ -1307,6 +1307,74 @@ public: SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM); + SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, + const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, + SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, + EVT MemVT, Align Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + const MDNode *Ranges = nullptr, bool IsExpanding = false); + inline SDValue + getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, + const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, + SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT, + MaybeAlign Alignment = MaybeAlign(), + MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone, + const AAMDNodes &AAInfo = AAMDNodes(), + const MDNode *Ranges = nullptr, bool IsExpanding = false) { + // Ensures that codegen never sees a None Alignment. + return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, + PtrInfo, MemVT, Alignment.getValueOr(getEVTAlign(MemVT)), + MMOFlags, AAInfo, Ranges, IsExpanding); + } + SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, + const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, + SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, bool IsExpanding = false); + SDValue getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, + SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, + MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, const MDNode *Ranges = nullptr, + bool IsExpanding = false); + SDValue getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, + SDValue Mask, SDValue EVL, MachineMemOperand *MMO, + bool IsExpanding = false); + SDValue getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, + SDValue Chain, SDValue Ptr, SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, EVT MemVT, + MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, bool IsExpanding = false); + SDValue getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, + SDValue Chain, SDValue Ptr, SDValue Mask, SDValue EVL, + EVT MemVT, MachineMemOperand *MMO, + bool IsExpanding = false); + SDValue getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM); + SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, + SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, + Align Alignment, MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo = AAMDNodes(), + bool IsCompressing = false); + SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, + SDValue Mask, SDValue EVL, MachineMemOperand *MMO, + bool IsCompressing = false); + SDValue getTruncStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, + SDValue Ptr, SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, bool IsCompressing = false); + SDValue getTruncStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, + SDValue Ptr, SDValue Mask, SDValue EVL, EVT SVT, + MachineMemOperand *MMO, bool IsCompressing = false); + SDValue getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM); + + SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef<SDValue> Ops, MachineMemOperand *MMO, + ISD::MemIndexType IndexType); + SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef<SDValue> Ops, MachineMemOperand *MMO, + ISD::MemIndexType IndexType); + SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, @@ -1664,10 +1732,6 @@ public: SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops); - SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, - ArrayRef<SDValue> Ops, - const SDNodeFlags Flags = SDNodeFlags()); - /// Fold floating-point operations with 2 operands when both operands are /// constants and/or undefined. SDValue foldConstantFPMath(unsigned Opcode, const SDLoc &DL, EVT VT, @@ -1769,6 +1833,19 @@ public: unsigned ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned Depth = 0) const; + /// Get the minimum bit size for this Value \p Op as a signed integer. + /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)). + /// Similar to the APInt::getMinSignedBits function. + /// Helper wrapper to ComputeNumSignBits. + unsigned ComputeMinSignedBits(SDValue Op, unsigned Depth = 0) const; + + /// Get the minimum bit size for this Value \p Op as a signed integer. + /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)). + /// Similar to the APInt::getMinSignedBits function. + /// Helper wrapper to ComputeNumSignBits. + unsigned ComputeMinSignedBits(SDValue Op, const APInt &DemandedElts, + unsigned Depth = 0) const; + /// Return true if this function can prove that \p Op is never poison /// and, if \p PoisonOnly is false, does not have undef bits. bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly = false, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h index 4ee58333495b..6a3d76be0ed6 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h @@ -50,6 +50,7 @@ public: SDValue getIndex() { return Index; } SDValue getIndex() const { return Index; } bool hasValidOffset() const { return Offset.hasValue(); } + int64_t getOffset() const { return *Offset; } // Returns true if `Other` and `*this` are both some offset from the same base // pointer. In that case, `Off` is set to the offset between `*this` and diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index deeca98af3f3..2855e1f1e587 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -58,7 +58,6 @@ namespace llvm { class APInt; class Constant; -template <typename T> struct DenseMapInfo; class GlobalValue; class MachineBasicBlock; class MachineConstantPoolValue; @@ -509,15 +508,19 @@ BEGIN_TWO_BYTE_PACK() class LSBaseSDNodeBitfields { friend class LSBaseSDNode; + friend class VPLoadStoreSDNode; friend class MaskedLoadStoreSDNode; friend class MaskedGatherScatterSDNode; + friend class VPGatherScatterSDNode; uint16_t : NumMemSDNodeBits; // This storage is shared between disparate class hierarchies to hold an // enumeration specific to the class hierarchy in use. // LSBaseSDNode => enum ISD::MemIndexedMode + // VPLoadStoreBaseSDNode => enum ISD::MemIndexedMode // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode + // VPGatherScatterSDNode => enum ISD::MemIndexType // MaskedGatherScatterSDNode => enum ISD::MemIndexType uint16_t AddressingMode : 3; }; @@ -525,8 +528,10 @@ BEGIN_TWO_BYTE_PACK() class LoadSDNodeBitfields { friend class LoadSDNode; + friend class VPLoadSDNode; friend class MaskedLoadSDNode; friend class MaskedGatherSDNode; + friend class VPGatherSDNode; uint16_t : NumLSBaseSDNodeBits; @@ -536,8 +541,10 @@ BEGIN_TWO_BYTE_PACK() class StoreSDNodeBitfields { friend class StoreSDNode; + friend class VPStoreSDNode; friend class MaskedStoreSDNode; friend class MaskedScatterSDNode; + friend class VPScatterSDNode; uint16_t : NumLSBaseSDNodeBits; @@ -1353,7 +1360,9 @@ public: const SDValue &getBasePtr() const { switch (getOpcode()) { case ISD::STORE: + case ISD::VP_STORE: case ISD::MSTORE: + case ISD::VP_SCATTER: return getOperand(2); case ISD::MGATHER: case ISD::MSCATTER: @@ -1393,6 +1402,10 @@ public: case ISD::MSTORE: case ISD::MGATHER: case ISD::MSCATTER: + case ISD::VP_LOAD: + case ISD::VP_STORE: + case ISD::VP_GATHER: + case ISD::VP_SCATTER: return true; default: return N->isMemIntrinsic() || N->isTargetMemoryOpcode(); @@ -1563,8 +1576,12 @@ public: Align getAlignValue() const { return Value->getAlignValue(); } bool isOne() const { return Value->isOne(); } - bool isNullValue() const { return Value->isZero(); } - bool isAllOnesValue() const { return Value->isMinusOne(); } + bool isZero() const { return Value->isZero(); } + // NOTE: This is soft-deprecated. Please use `isZero()` instead. + bool isNullValue() const { return isZero(); } + bool isAllOnes() const { return Value->isMinusOne(); } + // NOTE: This is soft-deprecated. Please use `isAllOnes()` instead. + bool isAllOnesValue() const { return isAllOnes(); } bool isMaxSignedValue() const { return Value->isMaxValue(true); } bool isMinSignedValue() const { return Value->isMinValue(true); } @@ -2031,8 +2048,25 @@ public: int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const; + /// Extract the raw bit data from a build vector of Undef, Constant or + /// ConstantFP node elements. Each raw bit element will be \p + /// DstEltSizeInBits wide, undef elements are treated as zero, and entirely + /// undefined elements are flagged in \p UndefElements. + bool getConstantRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits, + SmallVectorImpl<APInt> &RawBitElements, + BitVector &UndefElements) const; + bool isConstant() const; + /// Recast bit data \p SrcBitElements to \p DstEltSizeInBits wide elements. + /// Undef elements are treated as zero, and entirely undefined elements are + /// flagged in \p DstUndefElements. + static void recastRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits, + SmallVectorImpl<APInt> &DstBitElements, + ArrayRef<APInt> SrcBitElements, + BitVector &DstUndefElements, + const BitVector &SrcUndefElements); + static bool classof(const SDNode *N) { return N->getOpcode() == ISD::BUILD_VECTOR; } @@ -2318,6 +2352,116 @@ public: } }; +/// This base class is used to represent VP_LOAD and VP_STORE nodes +class VPLoadStoreSDNode : public MemSDNode { +public: + friend class SelectionDAG; + + VPLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl, + SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT, + MachineMemOperand *MMO) + : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { + LSBaseSDNodeBits.AddressingMode = AM; + assert(getAddressingMode() == AM && "Value truncated"); + } + + // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL) + // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL) + // Mask is a vector of i1 elements; + // the type of EVL is TLI.getVPExplicitVectorLengthTy(). + const SDValue &getOffset() const { + return getOperand(getOpcode() == ISD::VP_LOAD ? 2 : 3); + } + const SDValue &getBasePtr() const { + return getOperand(getOpcode() == ISD::VP_LOAD ? 1 : 2); + } + const SDValue &getMask() const { + return getOperand(getOpcode() == ISD::VP_LOAD ? 3 : 4); + } + const SDValue &getVectorLength() const { + return getOperand(getOpcode() == ISD::VP_LOAD ? 4 : 5); + } + + /// Return the addressing mode for this load or store: + /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. + ISD::MemIndexedMode getAddressingMode() const { + return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode); + } + + /// Return true if this is a pre/post inc/dec load/store. + bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } + + /// Return true if this is NOT a pre/post inc/dec load/store. + bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE; + } +}; + +/// This class is used to represent a VP_LOAD node +class VPLoadSDNode : public VPLoadStoreSDNode { +public: + friend class SelectionDAG; + + VPLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, + ISD::MemIndexedMode AM, ISD::LoadExtType ETy, bool isExpanding, + EVT MemVT, MachineMemOperand *MMO) + : VPLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) { + LoadSDNodeBits.ExtTy = ETy; + LoadSDNodeBits.IsExpanding = isExpanding; + } + + ISD::LoadExtType getExtensionType() const { + return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy); + } + + const SDValue &getBasePtr() const { return getOperand(1); } + const SDValue &getOffset() const { return getOperand(2); } + const SDValue &getMask() const { return getOperand(3); } + const SDValue &getVectorLength() const { return getOperand(4); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::VP_LOAD; + } + bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; } +}; + +/// This class is used to represent a VP_STORE node +class VPStoreSDNode : public VPLoadStoreSDNode { +public: + friend class SelectionDAG; + + VPStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, + ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing, + EVT MemVT, MachineMemOperand *MMO) + : VPLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) { + StoreSDNodeBits.IsTruncating = isTrunc; + StoreSDNodeBits.IsCompressing = isCompressing; + } + + /// Return true if this is a truncating store. + /// For integers this is the same as doing a TRUNCATE and storing the result. + /// For floats, it is the same as doing an FP_ROUND and storing the result. + bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } + + /// Returns true if the op does a compression to the vector before storing. + /// The node contiguously stores the active elements (integers or floats) + /// in src (those with their respective bit set in writemask k) to unaligned + /// memory at base_addr. + bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; } + + const SDValue &getValue() const { return getOperand(1); } + const SDValue &getBasePtr() const { return getOperand(2); } + const SDValue &getOffset() const { return getOperand(3); } + const SDValue &getMask() const { return getOperand(4); } + const SDValue &getVectorLength() const { return getOperand(5); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::VP_STORE; + } +}; + /// This base class is used to represent MLOAD and MSTORE nodes class MaskedLoadStoreSDNode : public MemSDNode { public: @@ -2424,6 +2568,94 @@ public: }; /// This is a base class used to represent +/// VP_GATHER and VP_SCATTER nodes +/// +class VPGatherScatterSDNode : public MemSDNode { +public: + friend class SelectionDAG; + + VPGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order, + const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO, ISD::MemIndexType IndexType) + : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { + LSBaseSDNodeBits.AddressingMode = IndexType; + assert(getIndexType() == IndexType && "Value truncated"); + } + + /// How is Index applied to BasePtr when computing addresses. + ISD::MemIndexType getIndexType() const { + return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode); + } + bool isIndexScaled() const { + return (getIndexType() == ISD::SIGNED_SCALED) || + (getIndexType() == ISD::UNSIGNED_SCALED); + } + bool isIndexSigned() const { + return (getIndexType() == ISD::SIGNED_SCALED) || + (getIndexType() == ISD::SIGNED_UNSCALED); + } + + // In the both nodes address is Op1, mask is Op2: + // VPGatherSDNode (Chain, base, index, scale, mask, vlen) + // VPScatterSDNode (Chain, value, base, index, scale, mask, vlen) + // Mask is a vector of i1 elements + const SDValue &getBasePtr() const { + return getOperand((getOpcode() == ISD::VP_GATHER) ? 1 : 2); + } + const SDValue &getIndex() const { + return getOperand((getOpcode() == ISD::VP_GATHER) ? 2 : 3); + } + const SDValue &getScale() const { + return getOperand((getOpcode() == ISD::VP_GATHER) ? 3 : 4); + } + const SDValue &getMask() const { + return getOperand((getOpcode() == ISD::VP_GATHER) ? 4 : 5); + } + const SDValue &getVectorLength() const { + return getOperand((getOpcode() == ISD::VP_GATHER) ? 5 : 6); + } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::VP_GATHER || + N->getOpcode() == ISD::VP_SCATTER; + } +}; + +/// This class is used to represent an VP_GATHER node +/// +class VPGatherSDNode : public VPGatherScatterSDNode { +public: + friend class SelectionDAG; + + VPGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO, ISD::MemIndexType IndexType) + : VPGatherScatterSDNode(ISD::VP_GATHER, Order, dl, VTs, MemVT, MMO, + IndexType) {} + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::VP_GATHER; + } +}; + +/// This class is used to represent an VP_SCATTER node +/// +class VPScatterSDNode : public VPGatherScatterSDNode { +public: + friend class SelectionDAG; + + VPScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO, ISD::MemIndexType IndexType) + : VPGatherScatterSDNode(ISD::VP_SCATTER, Order, dl, VTs, MemVT, MMO, + IndexType) {} + + const SDValue &getValue() const { return getOperand(1); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::VP_SCATTER; + } +}; + +/// This is a base class used to represent /// MGATHER and MSCATTER nodes /// class MaskedGatherScatterSDNode : public MemSDNode { diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h index 51f1d7d6fd21..bc22d7789856 100644 --- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h +++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h @@ -183,12 +183,12 @@ struct JumpTableHeader { const Value *SValue; MachineBasicBlock *HeaderBB; bool Emitted; - bool OmitRangeCheck; + bool FallthroughUnreachable; JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, bool E = false) : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H), - Emitted(E), OmitRangeCheck(false) {} + Emitted(E), FallthroughUnreachable(false) {} }; using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>; @@ -218,14 +218,14 @@ struct BitTestBlock { BitTestInfo Cases; BranchProbability Prob; BranchProbability DefaultProb; - bool OmitRangeCheck; + bool FallthroughUnreachable; BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, BitTestInfo C, BranchProbability Pr) : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D), - Cases(std::move(C)), Prob(Pr), OmitRangeCheck(false) {} + Cases(std::move(C)), Prob(Pr), FallthroughUnreachable(false) {} }; /// Return the range of values within a range. diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h index 29e644898f6b..7713dd0800c0 100644 --- a/llvm/include/llvm/CodeGen/TargetCallingConv.h +++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h @@ -247,11 +247,11 @@ namespace ISD { unsigned PartOffset; OutputArg() = default; - OutputArg(ArgFlagsTy flags, EVT vt, EVT argvt, bool isfixed, + OutputArg(ArgFlagsTy flags, MVT vt, EVT argvt, bool isfixed, unsigned origIdx, unsigned partOffs) - : Flags(flags), IsFixed(isfixed), OrigArgIndex(origIdx), - PartOffset(partOffs) { - VT = vt.getSimpleVT(); + : Flags(flags), IsFixed(isfixed), OrigArgIndex(origIdx), + PartOffset(partOffs) { + VT = vt; ArgVT = argvt; } }; diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 05d0591f1e5d..8bc730a3eda5 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -411,9 +411,12 @@ public: /// This method returns a null pointer if the transformation cannot be /// performed, otherwise it returns the last new instruction. /// - virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI, - MachineInstr &MI, - LiveVariables *LV) const { + /// If \p LIS is not nullptr, the LiveIntervals info should be updated for + /// replacing \p MI with new instructions, even though this function does not + /// remove MI. + virtual MachineInstr *convertToThreeAddress(MachineInstr &MI, + LiveVariables *LV, + LiveIntervals *LIS) const { return nullptr; } @@ -583,15 +586,14 @@ public: } /// Insert an unconditional indirect branch at the end of \p MBB to \p - /// NewDestBB. \p BrOffset indicates the offset of \p NewDestBB relative to + /// NewDestBB. Optionally, insert the clobbered register restoring in \p + /// RestoreBB. \p BrOffset indicates the offset of \p NewDestBB relative to /// the offset of the position to insert the new branch. - /// - /// \returns The number of bytes added to the block. - virtual unsigned insertIndirectBranch(MachineBasicBlock &MBB, - MachineBasicBlock &NewDestBB, - const DebugLoc &DL, - int64_t BrOffset = 0, - RegScavenger *RS = nullptr) const { + virtual void insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + MachineBasicBlock &RestoreBB, + const DebugLoc &DL, int64_t BrOffset = 0, + RegScavenger *RS = nullptr) const { llvm_unreachable("target did not implement"); } @@ -1537,7 +1539,8 @@ public: /// compares against in CmpValue. Return true if the comparison instruction /// can be analyzed. virtual bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, - Register &SrcReg2, int &Mask, int &Value) const { + Register &SrcReg2, int64_t &Mask, + int64_t &Value) const { return false; } @@ -1545,7 +1548,8 @@ public: /// into something more efficient. E.g., on ARM most instructions can set the /// flags register, obviating the need for a separate CMP. virtual bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, - Register SrcReg2, int Mask, int Value, + Register SrcReg2, int64_t Mask, + int64_t Value, const MachineRegisterInfo *MRI) const { return false; } @@ -1624,9 +1628,6 @@ public: unsigned defaultDefLatency(const MCSchedModel &SchedModel, const MachineInstr &DefMI) const; - int computeDefOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr &DefMI) const; - /// Return true if this opcode has high latency to its result. virtual bool isHighLatencyDef(int opc) const { return false; } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 692dc4d7d4cf..87f5168ec48f 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -30,6 +30,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -371,10 +372,18 @@ public: return getPointerTy(DL); } - /// EVT is not used in-tree, but is used by out-of-tree target. - /// A documentation for this function would be nice... + /// Return the type to use for a scalar shift opcode, given the shifted amount + /// type. Targets should return a legal type if the input type is legal. + /// Targets can return a type that is too small if the input type is illegal. virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; + /// Returns the type for the shift amount of a shift opcode. For vectors, + /// returns the input type. For scalars, behavior depends on \p LegalTypes. If + /// \p LegalTypes is true, calls getScalarShiftAmountTy, otherwise uses + /// pointer type. If getScalarShiftAmountTy or pointer type cannot represent + /// all possible shift amounts, returns MVT::i32. In general, \p LegalTypes + /// should be set to true for calls during type legalization and after type + /// legalization has been completed. EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes = true) const; @@ -591,7 +600,7 @@ public: /// Returns if it's reasonable to merge stores to MemVT size. virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, - const SelectionDAG &DAG) const { + const MachineFunction &MF) const { return true; } @@ -1396,6 +1405,11 @@ public: return NVT; } + virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, + bool AllowUnknown = false) const { + return getValueType(DL, Ty, AllowUnknown); + } + /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM /// operations except for the pointer size. If AllowUnknown is true, this /// will return MVT::Other for types with no EVT counterpart (e.g. structs), @@ -1448,7 +1462,7 @@ public: /// Return the desired alignment for ByVal or InAlloca aggregate function /// arguments in the caller parameter area. This is the actual alignment, not /// its logarithm. - virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; + virtual uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; /// Return the type of registers that this ValueType will eventually require. MVT getRegisterType(MVT VT) const { @@ -1763,9 +1777,7 @@ public: Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } /// Return the preferred loop alignment. - virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const { - return PrefLoopAlignment; - } + virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const; /// Should loops be aligned even when the function is marked OptSize (but not /// MinSize). @@ -2077,6 +2089,20 @@ public: return false; } + /// Return true if it may be profitable to transform + /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). + /// This may not be true if c1 and c2 can be represented as immediates but + /// c1*c2 cannot, for example. + /// The target should check if c1, c2 and c1*c2 can be represented as + /// immediates, or have to be materialized into registers. If it is not sure + /// about some cases, a default true can be returned to let the DAGCombiner + /// decide. + /// AddNode is (add x, c1), and ConstNode is c2. + virtual bool isMulAddWithConstProfitable(const SDValue &AddNode, + const SDValue &ConstNode) const { + return true; + } + /// Return true if it is more correct/profitable to use strict FP_TO_INT /// conversion operations - canonicalizing the FP source value instead of /// converting all cases and then selecting based on value. @@ -2177,8 +2203,7 @@ protected: /// Indicate that the specified operation does not work with the specified /// type and indicate what to do about it. Note that VT may refer to either /// the type of a result or that of an operand of Op. - void setOperationAction(unsigned Op, MVT VT, - LegalizeAction Action) { + void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); OpActions[(unsigned)VT.SimpleTy][Op] = Action; } @@ -2197,8 +2222,7 @@ protected: /// Indicate that the specified truncating store does not work with the /// specified type and indicate what to do about it. - void setTruncStoreAction(MVT ValVT, MVT MemVT, - LegalizeAction Action) { + void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; } @@ -2506,8 +2530,11 @@ public: return false; } - virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { - return false; + virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; } + virtual bool isTruncateFree(LLT FromTy, LLT ToTy, const DataLayout &DL, + LLVMContext &Ctx) const { + return isTruncateFree(getApproximateEVTForLLT(FromTy, DL, Ctx), + getApproximateEVTForLLT(ToTy, DL, Ctx)); } virtual bool isProfitableToHoist(Instruction *I) const { return true; } @@ -2583,8 +2610,11 @@ public: return false; } - virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { - return false; + virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; } + virtual bool isZExtFree(LLT FromTy, LLT ToTy, const DataLayout &DL, + LLVMContext &Ctx) const { + return isZExtFree(getApproximateEVTForLLT(FromTy, DL, Ctx), + getApproximateEVTForLLT(ToTy, DL, Ctx)); } /// Return true if sign-extension from FromTy to ToTy is cheaper than @@ -3807,7 +3837,7 @@ public: RetSExt = Call.hasRetAttr(Attribute::SExt); RetZExt = Call.hasRetAttr(Attribute::ZExt); NoMerge = Call.hasFnAttr(Attribute::NoMerge); - + Callee = Target; CallConv = Call.getCallingConv(); @@ -4424,33 +4454,29 @@ public: /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, /// vector nodes can only succeed if all operations are legal/custom. /// \param N Node to expand - /// \param Result output after conversion - /// \returns True, if the expansion was successful, false otherwise - bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + /// \returns The expansion result or SDValue() if it fails. + SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const; /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, /// vector nodes can only succeed if all operations are legal/custom. /// \param N Node to expand - /// \param Result output after conversion - /// \returns True, if the expansion was successful, false otherwise - bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + /// \returns The expansion result or SDValue() if it fails. + SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const; /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, /// vector nodes can only succeed if all operations are legal/custom. /// \param N Node to expand - /// \param Result output after conversion - /// \returns True, if the expansion was successful, false otherwise - bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + /// \returns The expansion result or SDValue() if it fails. + SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const; /// Expand ABS nodes. Expands vector/scalar ABS nodes, /// vector nodes can only succeed if all operations are legal/custom. /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) /// \param N Node to expand - /// \param Result output after conversion /// \param IsNegative indicate negated abs - /// \returns True, if the expansion was successful, false otherwise - bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG, - bool IsNegative = false) const; + /// \returns The expansion result or SDValue() if it fails. + SDValue expandABS(SDNode *N, SelectionDAG &DAG, + bool IsNegative = false) const; /// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64 /// scalar types. Returns SDValue() if expand fails. diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h index 11138039a3c5..9b13b61fc9de 100644 --- a/llvm/include/llvm/CodeGen/TargetPassConfig.h +++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h @@ -187,8 +187,7 @@ public: void substitutePass(AnalysisID StandardID, IdentifyingPassPtr TargetID); /// Insert InsertedPassID pass after TargetPassID pass. - void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID, - bool VerifyAfter = true); + void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID); /// Allow the target to enable a specific standard pass by default. void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); } @@ -323,8 +322,7 @@ public: /// Add standard passes after a pass that has just been added. For example, /// the MachineVerifier if it is enabled. - void addMachinePostPasses(const std::string &Banner, bool AllowVerify = true, - bool AllowStrip = true); + void addMachinePostPasses(const std::string &Banner); /// Check whether or not GlobalISel should abort on error. /// When this is disabled, GlobalISel will fall back on SDISel instead of @@ -449,16 +447,12 @@ protected: /// Add a CodeGen pass at this point in the pipeline after checking overrides. /// Return the pass that was added, or zero if no pass was added. - /// @p verifyAfter if true and adding a machine function pass add an extra - /// machine verification pass afterwards. - AnalysisID addPass(AnalysisID PassID, bool verifyAfter = true); + AnalysisID addPass(AnalysisID PassID); /// Add a pass to the PassManager if that pass is supposed to be run, as /// determined by the StartAfter and StopAfter options. Takes ownership of the /// pass. - /// @p verifyAfter if true and adding a machine function pass add an extra - /// machine verification pass afterwards. - void addPass(Pass *P, bool verifyAfter = true); + void addPass(Pass *P); /// addMachinePasses helper to create the target-selected or overriden /// regalloc pass. diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index 92ce5b737090..8483d078ca74 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -57,6 +57,8 @@ public: /// Classes with a higher priority value are assigned first by register /// allocators using a greedy heuristic. The value is in the range [0,63]. const uint8_t AllocationPriority; + /// Configurable target specific flags. + const uint8_t TSFlags; /// Whether the class supports two (or more) disjunct subregister indices. const bool HasDisjunctSubRegs; /// Whether a combination of subregisters can cover every register in the @@ -871,10 +873,6 @@ public: /// (3) Bottom-up allocation is no longer guaranteed to optimally color. virtual bool reverseLocalAssignment() const { return false; } - /// Add the allocation priority to global and split ranges as well as the - /// local ranges when registers are added to the queue. - virtual bool addAllocPriorityToGlobalRanges() const { return false; } - /// Allow the target to override the cost of using a callee-saved register for /// the first time. Default value of 0 means we will use a callee-saved /// register if it is available. diff --git a/llvm/include/llvm/CodeGen/TargetSchedule.h b/llvm/include/llvm/CodeGen/TargetSchedule.h index aa6b82e14aa6..049ede89ab46 100644 --- a/llvm/include/llvm/CodeGen/TargetSchedule.h +++ b/llvm/include/llvm/CodeGen/TargetSchedule.h @@ -15,7 +15,6 @@ #ifndef LLVM_CODEGEN_TARGETSCHEDULE_H #define LLVM_CODEGEN_TARGETSCHEDULE_H -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Config/llvm-config.h" diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index 0e88e705e16b..7f989e08e9bf 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -216,6 +216,7 @@ def untyped : ValueType<8, 174>; // Produces an untyped value def funcref : ValueType<0, 175>; // WebAssembly's funcref type def externref : ValueType<0, 176>; // WebAssembly's externref type def x86amx : ValueType<8192, 177>; // X86 AMX value +def i64x8 : ValueType<512, 178>; // 8 Consecutive GPRs (AArch64) def token : ValueType<0, 248>; // TokenTy @@ -243,7 +244,7 @@ def Any : ValueType<0, 255>; /// This class is for targets that want to use pointer types in patterns /// with the GlobalISelEmitter. Targets must define their own pointer /// derived from this class. The scalar argument should be an -/// integer type with the same bit size as the ponter. +/// integer type with the same bit size as the pointer. /// e.g. def p0 : PtrValueType <i64, 0>; class PtrValueType <ValueType scalar, int addrspace> : diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h index 7b89c9f66f86..1c6d0b1ead86 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h @@ -80,7 +80,7 @@ public: CompileUnit::DIEInfo &Info) = 0; /// Apply the valid relocations to the buffer \p Data, taking into - /// account that Data is at \p BaseOffset in the debug_info section. + /// account that Data is at \p BaseOffset in the .debug_info section. /// /// \returns true whether any reloc has been applied. virtual bool applyValidRelocs(MutableArrayRef<char> Data, uint64_t BaseOffset, @@ -109,7 +109,7 @@ public: /// Emit section named SecName with data SecData. virtual void emitSectionContents(StringRef SecData, StringRef SecName) = 0; - /// Emit the abbreviation table \p Abbrevs to the debug_abbrev section. + /// Emit the abbreviation table \p Abbrevs to the .debug_abbrev section. virtual void emitAbbrevs(const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs, unsigned DwarfVersion) = 0; @@ -137,7 +137,7 @@ public: virtual void emitAppleTypes(AccelTable<AppleAccelTableStaticTypeData> &Table) = 0; - /// Emit debug_ranges for \p FuncRange by translating the + /// Emit .debug_ranges for \p FuncRange by translating the /// original \p Entries. virtual void emitRangesEntries( int64_t UnitPcOffset, uint64_t OrigLowPc, @@ -145,17 +145,17 @@ public: const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries, unsigned AddressSize) = 0; - /// Emit debug_aranges entries for \p Unit and if \p DoRangesSection is true, - /// also emit the debug_ranges entries for the DW_TAG_compile_unit's + /// Emit .debug_aranges entries for \p Unit and if \p DoRangesSection is true, + /// also emit the .debug_ranges entries for the DW_TAG_compile_unit's /// DW_AT_ranges attribute. virtual void emitUnitRangesEntries(CompileUnit &Unit, bool DoRangesSection) = 0; - /// Copy the debug_line over to the updated binary while unobfuscating the + /// Copy the .debug_line over to the updated binary while unobfuscating the /// file names and directories. virtual void translateLineTable(DataExtractor LineData, uint64_t Offset) = 0; - /// Emit the line table described in \p Rows into the debug_line section. + /// Emit the line table described in \p Rows into the .debug_line section. virtual void emitLineTableForUnit(MCDwarfLineTableParams Params, StringRef PrologueBytes, unsigned MinInstLength, @@ -175,7 +175,7 @@ public: virtual void emitFDE(uint32_t CIEOffset, uint32_t AddreSize, uint32_t Address, StringRef Bytes) = 0; - /// Emit the debug_loc contribution for \p Unit by copying the entries from + /// Emit the .debug_loc contribution for \p Unit by copying the entries from /// \p Dwarf and offsetting them. Update the location attributes to point to /// the new entries. virtual void emitLocationsForUnit( @@ -184,7 +184,7 @@ public: ProcessExpr) = 0; /// Emit the compilation unit header for \p Unit in the - /// debug_info section. + /// .debug_info section. /// /// As a side effect, this also switches the current Dwarf version /// of the MC layer to the one of U.getOrigUnit(). @@ -695,7 +695,7 @@ private: /// Assign an abbreviation number to \p Abbrev void assignAbbrev(DIEAbbrev &Abbrev); - /// Compute and emit debug_ranges section for \p Unit, and + /// Compute and emit .debug_ranges section for \p Unit, and /// patch the attributes referencing it. void patchRangesForUnit(const CompileUnit &Unit, DWARFContext &Dwarf, const DWARFFile &File) const; @@ -706,7 +706,7 @@ private: /// Extract the line tables from the original dwarf, extract the relevant /// parts according to the linked function ranges and emit the result in the - /// debug_line section. + /// .debug_line section. void patchLineTableForUnit(CompileUnit &Unit, DWARFContext &OrigDwarf, const DWARFFile &File); @@ -753,7 +753,7 @@ private: StringMap<uint32_t> EmittedCIEs; /// Offset of the last CIE that has been emitted in the output - /// debug_frame section. + /// .debug_frame section. uint32_t LastCIEOffset = 0; /// Apple accelerator tables. diff --git a/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h b/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h index 18392e3608e7..99de8ebef812 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h @@ -10,7 +10,6 @@ #define LLVM_DEBUGINFO_CODEVIEW_CVRECORD_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/CodeView/RecordSerialization.h" diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def index 48ea7e52c172..4cee3abdde87 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def +++ b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def @@ -373,7 +373,7 @@ CV_REGISTER(AMD64_K7, 765) CV_REGISTER(ARM_NOREG, 0) -// General purpose 32-bit integer regisers +// General purpose 32-bit integer registers CV_REGISTER(ARM_R0, 10) CV_REGISTER(ARM_R1, 11) diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h index bdc6cf46509b..226a436c0930 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h @@ -18,6 +18,7 @@ namespace llvm { class ScopedPrinter; +class StringRef; namespace codeview { diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h index dcb26f12b13e..cdf3f60f88be 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h @@ -144,6 +144,27 @@ public: const dwarf::Attribute Attr, const DWARFUnit &U) const; + /// Compute an offset from a DIE specified by DIE offset and attribute index. + /// + /// \param AttrIndex an index of DWARF attribute. + /// \param DIEOffset the DIE offset that points to the ULEB128 abbreviation + /// code in the .debug_info data. + /// \param U the DWARFUnit the contains the DIE. + /// \returns an offset of the attribute. + uint64_t getAttributeOffsetFromIndex(uint32_t AttrIndex, uint64_t DIEOffset, + const DWARFUnit &U) const; + + /// Extract a DWARF form value from a DIE speccified by attribute index and + /// its offset. + /// + /// \param AttrIndex an index of DWARF attribute. + /// \param Offset offset of the attribute. + /// \param U the DWARFUnit the contains the DIE. + /// \returns Optional DWARF form value if the attribute was extracted. + Optional<DWARFFormValue> + getAttributeValueFromOffset(uint32_t AttrIndex, uint64_t Offset, + const DWARFUnit &U) const; + bool extract(DataExtractor Data, uint64_t* OffsetPtr); void dump(raw_ostream &OS) const; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h index 154f7893aa17..537a03ec11fc 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h @@ -39,6 +39,8 @@ struct DWARFAddressRange { /// Returns true if [LowPC, HighPC) intersects with [RHS.LowPC, RHS.HighPC). bool intersects(const DWARFAddressRange &RHS) const { assert(valid() && RHS.valid()); + if (SectionIndex != RHS.SectionIndex) + return false; // Empty ranges can't intersect. if (LowPC == HighPC || RHS.LowPC == RHS.HighPC) return false; @@ -69,12 +71,12 @@ struct DWARFAddressRange { inline bool operator<(const DWARFAddressRange &LHS, const DWARFAddressRange &RHS) { - return std::tie(LHS.LowPC, LHS.HighPC) < std::tie(RHS.LowPC, RHS.HighPC); + return std::tie(LHS.SectionIndex, LHS.LowPC, LHS.HighPC) < std::tie(RHS.SectionIndex, RHS.LowPC, RHS.HighPC); } inline bool operator==(const DWARFAddressRange &LHS, const DWARFAddressRange &RHS) { - return std::tie(LHS.LowPC, LHS.HighPC) == std::tie(RHS.LowPC, RHS.HighPC); + return std::tie(LHS.SectionIndex, LHS.LowPC, LHS.HighPC) == std::tie(RHS.SectionIndex, RHS.LowPC, RHS.HighPC); } raw_ostream &operator<<(raw_ostream &OS, const DWARFAddressRange &R); diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index 75b2280658f1..902973ff5722 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -243,6 +243,7 @@ public: } DWARFCompileUnit *getDWOCompileUnitForHash(uint64_t Hash); + DWARFTypeUnit *getTypeUnitForHash(uint16_t Version, uint64_t Hash, bool IsDWO); /// Return the compile unit that includes an offset (relative to .debug_info). DWARFCompileUnit *getCompileUnitForOffset(uint64_t Offset); @@ -373,8 +374,24 @@ public: return {2, 4, 8}; } static bool isAddressSizeSupported(unsigned AddressSize) { - return llvm::any_of(getSupportedAddressSizes(), - [=](auto Elem) { return Elem == AddressSize; }); + return llvm::is_contained(getSupportedAddressSizes(), AddressSize); + } + template <typename... Ts> + static Error checkAddressSizeSupported(unsigned AddressSize, + std::error_code EC, char const *Fmt, + const Ts &...Vals) { + if (isAddressSizeSupported(AddressSize)) + return Error::success(); + std::string Buffer; + raw_string_ostream Stream(Buffer); + Stream << format(Fmt, Vals...) + << " has unsupported address size: " << AddressSize + << " (supported are "; + ListSeparator LS; + for (unsigned Size : DWARFContext::getSupportedAddressSizes()) + Stream << LS << Size; + Stream << ')'; + return make_error<StringError>(Stream.str(), EC); } std::shared_ptr<DWARFContext> getDWOContext(StringRef AbsolutePath); @@ -387,9 +404,12 @@ public: function_ref<void(Error)> getWarningHandler() { return WarningHandler; } + enum class ProcessDebugRelocations { Process, Ignore }; + static std::unique_ptr<DWARFContext> - create(const object::ObjectFile &Obj, const LoadedObjectInfo *L = nullptr, - std::string DWPName = "", + create(const object::ObjectFile &Obj, + ProcessDebugRelocations RelocAction = ProcessDebugRelocations::Process, + const LoadedObjectInfo *L = nullptr, std::string DWPName = "", std::function<void(Error)> RecoverableErrorHandler = WithColor::defaultErrorHandler, std::function<void(Error)> WarningHandler = diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h index 0bfe9f376f46..c4370cb54113 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h @@ -24,9 +24,11 @@ class DWARFDebugInfoEntry { /// Offset within the .debug_info of the start of this entry. uint64_t Offset = 0; - /// The integer depth of this DIE within the compile unit DIEs where the - /// compile/type unit DIE has a depth of zero. - uint32_t Depth = 0; + /// Index of the parent die. UINT32_MAX if there is no parent. + uint32_t ParentIdx = UINT32_MAX; + + /// Index of the sibling die. Zero if there is no sibling. + uint32_t SiblingIdx = 0; const DWARFAbbreviationDeclaration *AbbrevDecl = nullptr; @@ -36,15 +38,31 @@ public: /// Extracts a debug info entry, which is a child of a given unit, /// starting at a given offset. If DIE can't be extracted, returns false and /// doesn't change OffsetPtr. - bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr); - /// High performance extraction should use this call. bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr, const DWARFDataExtractor &DebugInfoData, uint64_t UEndOffset, - uint32_t Depth); + uint32_t ParentIdx); uint64_t getOffset() const { return Offset; } - uint32_t getDepth() const { return Depth; } + + /// Returns index of the parent die. + Optional<uint32_t> getParentIdx() const { + if (ParentIdx == UINT32_MAX) + return None; + + return ParentIdx; + } + + /// Returns index of the sibling die. + Optional<uint32_t> getSiblingIdx() const { + if (SiblingIdx == 0) + return None; + + return SiblingIdx; + } + + /// Set index of sibling. + void setSiblingIdx(uint32_t Idx) { SiblingIdx = Idx; } dwarf::Tag getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : dwarf::DW_TAG_null; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index d1d65372740b..ee15b6d4112d 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -110,10 +110,6 @@ public: /// Length of the prologue in bytes. uint64_t getLength() const; - int32_t getMaxLineIncrementForSpecialOpcode() const { - return LineBase + (int8_t)LineRange - 1; - } - /// Get DWARF-version aware access to the file name entry at the provided /// index. const llvm::DWARFDebugLine::FileNameEntry & diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index 2f72c642a2d5..0d9f37c5610b 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -49,12 +49,7 @@ public: /// 2. An address, which defines the appropriate base address for /// use in interpreting the beginning and ending address offsets of /// subsequent entries of the location list. - bool isBaseAddressSelectionEntry(uint8_t AddressSize) const { - assert(AddressSize == 4 || AddressSize == 8); - if (AddressSize == 4) - return StartAddress == -1U; - return StartAddress == -1ULL; - } + bool isBaseAddressSelectionEntry(uint8_t AddressSize) const; }; private: diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h index 1903bab5e73f..8f93ebc4ebc0 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -182,6 +182,8 @@ public: DWARFDie getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const; DWARFDie getAttributeValueAsReferencedDie(const DWARFFormValue &V) const; + DWARFDie resolveTypeUnitReference() const; + /// Extract the range base attribute from this DIE as absolute section offset. /// /// This is a utility function that checks for either the DW_AT_rnglists_base @@ -220,16 +222,6 @@ public: /// information is available. Expected<DWARFAddressRangesVector> getAddressRanges() const; - /// Get all address ranges for any DW_TAG_subprogram DIEs in this DIE or any - /// of its children. - /// - /// Get the hi/low PC range if both attributes are available or exrtracts the - /// non-contiguous address ranges from the DW_AT_ranges attribute for this DIE - /// and all children. - /// - /// \param Ranges the addres range vector to fill in. - void collectChildrenAddressRanges(DWARFAddressRangesVector &Ranges) const; - bool addressRangeContainsAddress(const uint64_t Address) const; Expected<DWARFLocationExpressionsVector> @@ -246,6 +238,8 @@ public: /// for ShortName if LinkageName is not found. /// Returns null if no name is found. const char *getName(DINameKind Kind) const; + void getFullName(raw_string_ostream &, + std::string *OriginalFullName = nullptr) const; /// Return the DIE short name resolving DW_AT_specification or /// DW_AT_abstract_origin references if necessary. Returns null if no name diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h index 794e859bfe72..b694eeacfd9d 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h @@ -86,24 +86,30 @@ public: uint64_t OperandEndOffsets[2]; public: - Description &getDescription() { return Desc; } - uint8_t getCode() { return Opcode; } - uint64_t getRawOperand(unsigned Idx) { return Operands[Idx]; } - uint64_t getOperandEndOffset(unsigned Idx) { return OperandEndOffsets[Idx]; } - uint64_t getEndOffset() { return EndOffset; } - bool extract(DataExtractor Data, uint8_t AddressSize, uint64_t Offset, - Optional<dwarf::DwarfFormat> Format); - bool isError() { return Error; } + const Description &getDescription() const { return Desc; } + uint8_t getCode() const { return Opcode; } + uint64_t getRawOperand(unsigned Idx) const { return Operands[Idx]; } + uint64_t getOperandEndOffset(unsigned Idx) const { + return OperandEndOffsets[Idx]; + } + uint64_t getEndOffset() const { return EndOffset; } + bool isError() const { return Error; } bool print(raw_ostream &OS, DIDumpOptions DumpOpts, const DWARFExpression *Expr, const MCRegisterInfo *RegInfo, - DWARFUnit *U, bool isEH); - bool verify(DWARFUnit *U); + DWARFUnit *U, bool isEH) const; + + /// Verify \p Op. Does not affect the return of \a isError(). + static bool verify(const Operation &Op, DWARFUnit *U); + + private: + bool extract(DataExtractor Data, uint8_t AddressSize, uint64_t Offset, + Optional<dwarf::DwarfFormat> Format); }; /// An iterator to go through the expression operations. class iterator : public iterator_facade_base<iterator, std::forward_iterator_tag, - Operation> { + const Operation> { friend class DWARFExpression; const DWARFExpression *Expr; uint64_t Offset; @@ -116,19 +122,17 @@ public: } public: - class Operation &operator++() { + iterator &operator++() { Offset = Op.isError() ? Expr->Data.getData().size() : Op.EndOffset; Op.Error = Offset >= Expr->Data.getData().size() || !Op.extract(Expr->Data, Expr->AddressSize, Offset, Expr->Format); - return Op; + return *this; } - class Operation &operator*() { - return Op; - } + const Operation &operator*() const { return Op; } - iterator skipBytes(uint64_t Add) { + iterator skipBytes(uint64_t Add) const { return iterator(Expr, Op.EndOffset + Add); } @@ -159,6 +163,8 @@ public: bool operator==(const DWARFExpression &RHS) const; + StringRef getData() const { return Data.getData(); } + private: DataExtractor Data; uint8_t AddressSize; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index 43be024f0d37..3c051c3ea018 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -102,10 +102,6 @@ public: return extractValue(Data, OffsetPtr, FormParams, nullptr, U); } - bool isInlinedCStr() const { - return Value.data != nullptr && Value.data == (const uint8_t *)Value.cstr; - } - /// getAsFoo functions below return the extracted value as Foo if only /// DWARFFormValue has form class is suitable for representing Foo. Optional<uint64_t> getAsReference() const; @@ -123,6 +119,19 @@ public: Optional<ArrayRef<uint8_t>> getAsBlock() const; Optional<uint64_t> getAsCStringOffset() const; Optional<uint64_t> getAsReferenceUVal() const; + /// Correctly extract any file paths from a form value. + /// + /// These attributes can be in the from DW_AT_decl_file or DW_AT_call_file + /// attributes. We need to use the file index in the correct DWARFUnit's line + /// table prologue, and each DWARFFormValue has the DWARFUnit the form value + /// was extracted from. + /// + /// \param Kind The kind of path to extract. + /// + /// \returns A valid string value on success, or llvm::None if the form class + /// is not FC_Constant, or if the file index is not valid. + Optional<std::string> + getAsFile(DILineInfoSpecifier::FileLineInfoKind Kind) const; /// Skip a form's value in \p DebugInfoData at the offset specified by /// \p OffsetPtr. diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index 93d7e2b563fd..d471b80c7fe1 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -49,8 +49,6 @@ public: DieRangeInfo(std::vector<DWARFAddressRange> Ranges) : Ranges(std::move(Ranges)) {} - typedef std::vector<DWARFAddressRange>::const_iterator - address_range_iterator; typedef std::set<DieRangeInfo>::const_iterator die_range_info_iterator; /// Inserts the address range. If the range overlaps with an existing @@ -62,16 +60,6 @@ public: /// children address ranges must all be contained in. Optional<DWARFAddressRange> insert(const DWARFAddressRange &R); - /// Finds an address range in the sorted vector of ranges. - address_range_iterator findRange(const DWARFAddressRange &R) const { - auto Begin = Ranges.begin(); - auto End = Ranges.end(); - auto Iter = std::upper_bound(Begin, End, R); - if (Iter != Begin) - --Iter; - return Iter; - } - /// Inserts the address range info. If any of its ranges overlaps with a /// range in an existing range info, the range info is *not* added and an /// iterator to the overlapping range info. @@ -91,14 +79,11 @@ private: raw_ostream &OS; DWARFContext &DCtx; DIDumpOptions DumpOpts; - /// A map that tracks all references (converted absolute references) so we - /// can verify each reference points to a valid DIE and not an offset that - /// lies between to valid DIEs. - std::map<uint64_t, std::set<uint64_t>> ReferenceToDIEOffsets; uint32_t NumDebugLineErrors = 0; // Used to relax some checks that do not currently work portably bool IsObjectFile; bool IsMachOObject; + using ReferenceMap = std::map<uint64_t, std::set<uint64_t>>; raw_ostream &error() const; raw_ostream &warn() const; @@ -140,6 +125,7 @@ private: bool verifyUnitHeader(const DWARFDataExtractor DebugInfoData, uint64_t *Offset, unsigned UnitIndex, uint8_t &UnitType, bool &isUnitDWARF64); + bool verifyName(const DWARFDie &Die); /// Verifies the header of a unit in a .debug_info or .debug_types section. /// @@ -156,7 +142,9 @@ private: /// \param Unit The DWARF Unit to verify. /// /// \returns The number of errors that occurred during verification. - unsigned verifyUnitContents(DWARFUnit &Unit); + unsigned verifyUnitContents(DWARFUnit &Unit, + ReferenceMap &UnitLocalReferences, + ReferenceMap &CrossUnitReferences); /// Verifies the unit headers and contents in a .debug_info or .debug_types /// section. @@ -208,7 +196,9 @@ private: /// /// \returns NumErrors The number of errors occurred during verification of /// attributes' forms in a unit - unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue); + unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue, + ReferenceMap &UnitLocalReferences, + ReferenceMap &CrossUnitReferences); /// Verifies the all valid references that were found when iterating through /// all of the DIE attributes. @@ -220,7 +210,9 @@ private: /// /// \returns NumErrors The number of errors occurred during verification of /// references for the .debug_info and .debug_types sections - unsigned verifyDebugInfoReferences(); + unsigned verifyDebugInfoReferences( + const ReferenceMap &, + llvm::function_ref<DWARFUnit *(uint64_t)> GetUnitForDieOffset); /// Verify the DW_AT_stmt_list encoding and value and ensure that no /// compile units that have the same DW_AT_stmt_list value. diff --git a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h index f7f800d01647..045c9e3f3ebd 100644 --- a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h +++ b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h @@ -9,7 +9,6 @@ #ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H #define LLVM_DEBUGINFO_GSYM_STRINGTABLE_H -#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/DebugInfo/GSYM/Range.h" #include <stdint.h> diff --git a/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h b/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h index 83331b14b8af..a922839a999d 100644 --- a/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h +++ b/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h @@ -93,6 +93,9 @@ inline bool isValidBlockSize(uint32_t Size) { case 1024: case 2048: case 4096: + case 8192: + case 16384: + case 32768: return true; } return false; diff --git a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h index 473c89e8106f..296a4840b779 100644 --- a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h +++ b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h @@ -58,12 +58,12 @@ public: return support::little; } - Error readBytes(uint32_t Offset, uint32_t Size, + Error readBytes(uint64_t Offset, uint64_t Size, ArrayRef<uint8_t> &Buffer) override; - Error readLongestContiguousChunk(uint32_t Offset, + Error readLongestContiguousChunk(uint64_t Offset, ArrayRef<uint8_t> &Buffer) override; - uint32_t getLength() override; + uint64_t getLength() override; BumpPtrAllocator &getAllocator() { return Allocator; } @@ -79,10 +79,10 @@ protected: private: const MSFStreamLayout &getStreamLayout() const { return StreamLayout; } - void fixCacheAfterWrite(uint32_t Offset, ArrayRef<uint8_t> Data) const; + void fixCacheAfterWrite(uint64_t Offset, ArrayRef<uint8_t> Data) const; - Error readBytes(uint32_t Offset, MutableArrayRef<uint8_t> Buffer); - bool tryReadContiguously(uint32_t Offset, uint32_t Size, + Error readBytes(uint64_t Offset, MutableArrayRef<uint8_t> Buffer); + bool tryReadContiguously(uint64_t Offset, uint64_t Size, ArrayRef<uint8_t> &Buffer); const uint32_t BlockSize; @@ -125,13 +125,13 @@ public: return support::little; } - Error readBytes(uint32_t Offset, uint32_t Size, + Error readBytes(uint64_t Offset, uint64_t Size, ArrayRef<uint8_t> &Buffer) override; - Error readLongestContiguousChunk(uint32_t Offset, + Error readLongestContiguousChunk(uint64_t Offset, ArrayRef<uint8_t> &Buffer) override; - uint32_t getLength() override; + uint64_t getLength() override; - Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override; + Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Buffer) override; Error commit() override; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h index 5fb13ad30e91..de5b46f21672 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h @@ -31,9 +31,7 @@ struct FileInfoSubstreamHeader; class DbiModuleSourceFilesIterator : public iterator_facade_base<DbiModuleSourceFilesIterator, std::random_access_iterator_tag, StringRef> { - using BaseType = - iterator_facade_base<DbiModuleSourceFilesIterator, - std::random_access_iterator_tag, StringRef>; + using BaseType = typename DbiModuleSourceFilesIterator::iterator_facade_base; public: DbiModuleSourceFilesIterator(const DbiModuleList &Modules, uint32_t Modi, diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h index 95c0a89551ed..474bd796b2b3 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h @@ -38,6 +38,7 @@ class HashTableIterator : public iterator_facade_base<HashTableIterator<ValueT>, std::forward_iterator_tag, const std::pair<uint32_t, ValueT>> { + using BaseT = typename HashTableIterator::iterator_facade_base; friend HashTable<ValueT>; HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index, @@ -76,9 +77,7 @@ public: // Implement postfix op++ in terms of prefix op++ by using the superclass // implementation. - using iterator_facade_base<HashTableIterator<ValueT>, - std::forward_iterator_tag, - const std::pair<uint32_t, ValueT>>::operator++; + using BaseT::operator++; HashTableIterator &operator++() { while (Index < Map->Buckets.size()) { ++Index; diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h index 1df059ffa9fd..f110e90b3f90 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h @@ -9,7 +9,6 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H #define LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H -#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h index 5dedc70f11ba..be0ddf0a063a 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h @@ -9,7 +9,6 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVELINENUMBER_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVELINENUMBER_H -#include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/CodeView/Line.h" #include "llvm/DebugInfo/PDB/IPDBLineNumber.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h index 8f1834d0a2c2..90b5d8068959 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h @@ -9,7 +9,6 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h index 4ae8f1471781..21995ca665c1 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h @@ -9,7 +9,6 @@ #ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H #define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H -#include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h index 3c414e7a9005..004d005280d4 100644 --- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h +++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h @@ -10,7 +10,6 @@ #define LLVM_DEBUGINFO_PDB_NATIVE_PDBFILEBUILDER_H #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h index c396a1dc5dd3..3150e049320b 100644 --- a/llvm/include/llvm/Demangle/Demangle.h +++ b/llvm/include/llvm/Demangle/Demangle.h @@ -31,7 +31,6 @@ enum : int { char *itaniumDemangle(const char *mangled_name, char *buf, size_t *n, int *status); - enum MSDemangleFlags { MSDF_None = 0, MSDF_DumpBackrefs = 1 << 0, @@ -39,6 +38,7 @@ enum MSDemangleFlags { MSDF_NoCallingConvention = 1 << 2, MSDF_NoReturnType = 1 << 3, MSDF_NoMemberType = 1 << 4, + MSDF_NoVariableType = 1 << 5, }; /// Demangles the Microsoft symbol pointed at by mangled_name and returns it. @@ -53,13 +53,16 @@ enum MSDemangleFlags { /// receives the size of the demangled string on output if n_buf is not nullptr. /// status receives one of the demangle_ enum entries above if it's not nullptr. /// Flags controls various details of the demangled representation. -char *microsoftDemangle(const char *mangled_name, size_t *n_read, - char *buf, size_t *n_buf, - int *status, MSDemangleFlags Flags = MSDF_None); +char *microsoftDemangle(const char *mangled_name, size_t *n_read, char *buf, + size_t *n_buf, int *status, + MSDemangleFlags Flags = MSDF_None); // Demangles a Rust v0 mangled symbol. The API follows that of __cxa_demangle. char *rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status); +// Demangles a D mangled symbol. +char *dlangDemangle(const char *MangledName); + /// Attempt to demangle a string using different demangling schemes. /// The function uses heuristics to determine which demangling scheme to use. /// \param MangledName - reference to string to demangle. @@ -67,6 +70,8 @@ char *rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status); /// demangling occurred. std::string demangle(const std::string &MangledName); +bool nonMicrosoftDemangle(const char *MangledName, std::string &Result); + /// "Partial" demangler. This supports demangling a string into an AST /// (typically an intermediate stage in itaniumDemangle) and querying certain /// properties or partially printing the demangled name. @@ -118,6 +123,7 @@ struct ItaniumPartialDemangler { bool isSpecialName() const; ~ItaniumPartialDemangler(); + private: void *RootNode; void *Context; diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 9163b713d118..86f5c992b63d 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -57,6 +57,7 @@ X(LocalName) \ X(VectorType) \ X(PixelVectorType) \ + X(BinaryFPType) \ X(SyntheticTemplateParamName) \ X(TypeTemplateParamDecl) \ X(NonTypeTemplateParamDecl) \ @@ -109,6 +110,126 @@ DEMANGLE_NAMESPACE_BEGIN +template <class T, size_t N> class PODSmallVector { + static_assert(std::is_pod<T>::value, + "T is required to be a plain old data type"); + + T *First = nullptr; + T *Last = nullptr; + T *Cap = nullptr; + T Inline[N] = {0}; + + bool isInline() const { return First == Inline; } + + void clearInline() { + First = Inline; + Last = Inline; + Cap = Inline + N; + } + + void reserve(size_t NewCap) { + size_t S = size(); + if (isInline()) { + auto *Tmp = static_cast<T *>(std::malloc(NewCap * sizeof(T))); + if (Tmp == nullptr) + std::terminate(); + std::copy(First, Last, Tmp); + First = Tmp; + } else { + First = static_cast<T *>(std::realloc(First, NewCap * sizeof(T))); + if (First == nullptr) + std::terminate(); + } + Last = First + S; + Cap = First + NewCap; + } + +public: + PODSmallVector() : First(Inline), Last(First), Cap(Inline + N) {} + + PODSmallVector(const PODSmallVector &) = delete; + PODSmallVector &operator=(const PODSmallVector &) = delete; + + PODSmallVector(PODSmallVector &&Other) : PODSmallVector() { + if (Other.isInline()) { + std::copy(Other.begin(), Other.end(), First); + Last = First + Other.size(); + Other.clear(); + return; + } + + First = Other.First; + Last = Other.Last; + Cap = Other.Cap; + Other.clearInline(); + } + + PODSmallVector &operator=(PODSmallVector &&Other) { + if (Other.isInline()) { + if (!isInline()) { + std::free(First); + clearInline(); + } + std::copy(Other.begin(), Other.end(), First); + Last = First + Other.size(); + Other.clear(); + return *this; + } + + if (isInline()) { + First = Other.First; + Last = Other.Last; + Cap = Other.Cap; + Other.clearInline(); + return *this; + } + + std::swap(First, Other.First); + std::swap(Last, Other.Last); + std::swap(Cap, Other.Cap); + Other.clear(); + return *this; + } + + // NOLINTNEXTLINE(readability-identifier-naming) + void push_back(const T &Elem) { + if (Last == Cap) + reserve(size() * 2); + *Last++ = Elem; + } + + // NOLINTNEXTLINE(readability-identifier-naming) + void pop_back() { + assert(Last != First && "Popping empty vector!"); + --Last; + } + + void dropBack(size_t Index) { + assert(Index <= size() && "dropBack() can't expand!"); + Last = First + Index; + } + + T *begin() { return First; } + T *end() { return Last; } + + bool empty() const { return First == Last; } + size_t size() const { return static_cast<size_t>(Last - First); } + T &back() { + assert(Last != First && "Calling back() on empty vector!"); + return *(Last - 1); + } + T &operator[](size_t Index) { + assert(Index < size() && "Invalid access!"); + return *(begin() + Index); + } + void clear() { Last = First; } + + ~PODSmallVector() { + if (!isInline()) + std::free(First); + } +}; + // Base class of all AST nodes. The AST is built by the parser, then is // traversed by the printLeft/Right functions to produce a demangled string. class Node { @@ -155,50 +276,48 @@ public: // would construct an equivalent node. //template<typename Fn> void match(Fn F) const; - bool hasRHSComponent(OutputStream &S) const { + bool hasRHSComponent(OutputBuffer &OB) const { if (RHSComponentCache != Cache::Unknown) return RHSComponentCache == Cache::Yes; - return hasRHSComponentSlow(S); + return hasRHSComponentSlow(OB); } - bool hasArray(OutputStream &S) const { + bool hasArray(OutputBuffer &OB) const { if (ArrayCache != Cache::Unknown) return ArrayCache == Cache::Yes; - return hasArraySlow(S); + return hasArraySlow(OB); } - bool hasFunction(OutputStream &S) const { + bool hasFunction(OutputBuffer &OB) const { if (FunctionCache != Cache::Unknown) return FunctionCache == Cache::Yes; - return hasFunctionSlow(S); + return hasFunctionSlow(OB); } Kind getKind() const { return K; } - virtual bool hasRHSComponentSlow(OutputStream &) const { return false; } - virtual bool hasArraySlow(OutputStream &) const { return false; } - virtual bool hasFunctionSlow(OutputStream &) const { return false; } + virtual bool hasRHSComponentSlow(OutputBuffer &) const { return false; } + virtual bool hasArraySlow(OutputBuffer &) const { return false; } + virtual bool hasFunctionSlow(OutputBuffer &) const { return false; } // Dig through "glue" nodes like ParameterPack and ForwardTemplateReference to // get at a node that actually represents some concrete syntax. - virtual const Node *getSyntaxNode(OutputStream &) const { - return this; - } + virtual const Node *getSyntaxNode(OutputBuffer &) const { return this; } - void print(OutputStream &S) const { - printLeft(S); + void print(OutputBuffer &OB) const { + printLeft(OB); if (RHSComponentCache != Cache::No) - printRight(S); + printRight(OB); } - // Print the "left" side of this Node into OutputStream. - virtual void printLeft(OutputStream &) const = 0; + // Print the "left" side of this Node into OutputBuffer. + virtual void printLeft(OutputBuffer &) const = 0; // Print the "right". This distinction is necessary to represent C++ types // that appear on the RHS of their subtype, such as arrays or functions. // Since most types don't have such a component, provide a default // implementation. - virtual void printRight(OutputStream &) const {} + virtual void printRight(OutputBuffer &) const {} virtual StringView getBaseName() const { return StringView(); } @@ -227,19 +346,19 @@ public: Node *operator[](size_t Idx) const { return Elements[Idx]; } - void printWithComma(OutputStream &S) const { + void printWithComma(OutputBuffer &OB) const { bool FirstElement = true; for (size_t Idx = 0; Idx != NumElements; ++Idx) { - size_t BeforeComma = S.getCurrentPosition(); + size_t BeforeComma = OB.getCurrentPosition(); if (!FirstElement) - S += ", "; - size_t AfterComma = S.getCurrentPosition(); - Elements[Idx]->print(S); + OB += ", "; + size_t AfterComma = OB.getCurrentPosition(); + Elements[Idx]->print(OB); // Elements[Idx] is an empty parameter pack expansion, we should erase the // comma we just printed. - if (AfterComma == S.getCurrentPosition()) { - S.setCurrentPosition(BeforeComma); + if (AfterComma == OB.getCurrentPosition()) { + OB.setCurrentPosition(BeforeComma); continue; } @@ -254,9 +373,7 @@ struct NodeArrayNode : Node { template<typename Fn> void match(Fn F) const { F(Array); } - void printLeft(OutputStream &S) const override { - Array.printWithComma(S); - } + void printLeft(OutputBuffer &OB) const override { Array.printWithComma(OB); } }; class DotSuffix final : public Node { @@ -269,11 +386,11 @@ public: template<typename Fn> void match(Fn F) const { F(Prefix, Suffix); } - void printLeft(OutputStream &s) const override { - Prefix->print(s); - s += " ("; - s += Suffix; - s += ")"; + void printLeft(OutputBuffer &OB) const override { + Prefix->print(OB); + OB += " ("; + OB += Suffix; + OB += ")"; } }; @@ -288,12 +405,12 @@ public: template <typename Fn> void match(Fn F) const { F(Ty, Ext, TA); } - void printLeft(OutputStream &S) const override { - Ty->print(S); - S += " "; - S += Ext; + void printLeft(OutputBuffer &OB) const override { + Ty->print(OB); + OB += " "; + OB += Ext; if (TA != nullptr) - TA->print(S); + TA->print(OB); } }; @@ -319,13 +436,13 @@ protected: const Qualifiers Quals; const Node *Child; - void printQuals(OutputStream &S) const { + void printQuals(OutputBuffer &OB) const { if (Quals & QualConst) - S += " const"; + OB += " const"; if (Quals & QualVolatile) - S += " volatile"; + OB += " volatile"; if (Quals & QualRestrict) - S += " restrict"; + OB += " restrict"; } public: @@ -336,22 +453,22 @@ public: template<typename Fn> void match(Fn F) const { F(Child, Quals); } - bool hasRHSComponentSlow(OutputStream &S) const override { - return Child->hasRHSComponent(S); + bool hasRHSComponentSlow(OutputBuffer &OB) const override { + return Child->hasRHSComponent(OB); } - bool hasArraySlow(OutputStream &S) const override { - return Child->hasArray(S); + bool hasArraySlow(OutputBuffer &OB) const override { + return Child->hasArray(OB); } - bool hasFunctionSlow(OutputStream &S) const override { - return Child->hasFunction(S); + bool hasFunctionSlow(OutputBuffer &OB) const override { + return Child->hasFunction(OB); } - void printLeft(OutputStream &S) const override { - Child->printLeft(S); - printQuals(S); + void printLeft(OutputBuffer &OB) const override { + Child->printLeft(OB); + printQuals(OB); } - void printRight(OutputStream &S) const override { Child->printRight(S); } + void printRight(OutputBuffer &OB) const override { Child->printRight(OB); } }; class ConversionOperatorType final : public Node { @@ -363,9 +480,9 @@ public: template<typename Fn> void match(Fn F) const { F(Ty); } - void printLeft(OutputStream &S) const override { - S += "operator "; - Ty->print(S); + void printLeft(OutputBuffer &OB) const override { + OB += "operator "; + Ty->print(OB); } }; @@ -379,9 +496,9 @@ public: template<typename Fn> void match(Fn F) const { F(Ty, Postfix); } - void printLeft(OutputStream &s) const override { - Ty->printLeft(s); - s += Postfix; + void printLeft(OutputBuffer &OB) const override { + Ty->printLeft(OB); + OB += Postfix; } }; @@ -396,7 +513,7 @@ public: StringView getName() const { return Name; } StringView getBaseName() const override { return Name; } - void printLeft(OutputStream &s) const override { s += Name; } + void printLeft(OutputBuffer &OB) const override { OB += Name; } }; class ElaboratedTypeSpefType : public Node { @@ -408,10 +525,10 @@ public: template<typename Fn> void match(Fn F) const { F(Kind, Child); } - void printLeft(OutputStream &S) const override { - S += Kind; - S += ' '; - Child->print(S); + void printLeft(OutputBuffer &OB) const override { + OB += Kind; + OB += ' '; + Child->print(OB); } }; @@ -426,11 +543,11 @@ struct AbiTagAttr : Node { template<typename Fn> void match(Fn F) const { F(Base, Tag); } - void printLeft(OutputStream &S) const override { - Base->printLeft(S); - S += "[abi:"; - S += Tag; - S += "]"; + void printLeft(OutputBuffer &OB) const override { + Base->printLeft(OB); + OB += "[abi:"; + OB += Tag; + OB += "]"; } }; @@ -442,10 +559,10 @@ public: template<typename Fn> void match(Fn F) const { F(Conditions); } - void printLeft(OutputStream &S) const override { - S += " [enable_if:"; - Conditions.printWithComma(S); - S += ']'; + void printLeft(OutputBuffer &OB) const override { + OB += " [enable_if:"; + Conditions.printWithComma(OB); + OB += ']'; } }; @@ -466,11 +583,11 @@ public: static_cast<const NameType *>(Ty)->getName() == "objc_object"; } - void printLeft(OutputStream &S) const override { - Ty->print(S); - S += "<"; - S += Protocol; - S += ">"; + void printLeft(OutputBuffer &OB) const override { + Ty->print(OB); + OB += "<"; + OB += Protocol; + OB += ">"; } }; @@ -484,34 +601,34 @@ public: template<typename Fn> void match(Fn F) const { F(Pointee); } - bool hasRHSComponentSlow(OutputStream &S) const override { - return Pointee->hasRHSComponent(S); + bool hasRHSComponentSlow(OutputBuffer &OB) const override { + return Pointee->hasRHSComponent(OB); } - void printLeft(OutputStream &s) const override { + void printLeft(OutputBuffer &OB) const override { // We rewrite objc_object<SomeProtocol>* into id<SomeProtocol>. if (Pointee->getKind() != KObjCProtoName || !static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) { - Pointee->printLeft(s); - if (Pointee->hasArray(s)) - s += " "; - if (Pointee->hasArray(s) || Pointee->hasFunction(s)) - s += "("; - s += "*"; + Pointee->printLeft(OB); + if (Pointee->hasArray(OB)) + OB += " "; + if (Pointee->hasArray(OB) || Pointee->hasFunction(OB)) + OB += "("; + OB += "*"; } else { const auto *objcProto = static_cast<const ObjCProtoName *>(Pointee); - s += "id<"; - s += objcProto->Protocol; - s += ">"; + OB += "id<"; + OB += objcProto->Protocol; + OB += ">"; } } - void printRight(OutputStream &s) const override { + void printRight(OutputBuffer &OB) const override { if (Pointee->getKind() != KObjCProtoName || !static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) { - if (Pointee->hasArray(s) || Pointee->hasFunction(s)) - s += ")"; - Pointee->printRight(s); + if (Pointee->hasArray(OB) || Pointee->hasFunction(OB)) + OB += ")"; + Pointee->printRight(OB); } } }; @@ -531,15 +648,30 @@ class ReferenceType : public Node { // Dig through any refs to refs, collapsing the ReferenceTypes as we go. The // rule here is rvalue ref to rvalue ref collapses to a rvalue ref, and any // other combination collapses to a lvalue ref. - std::pair<ReferenceKind, const Node *> collapse(OutputStream &S) const { + // + // A combination of a TemplateForwardReference and a back-ref Substitution + // from an ill-formed string may have created a cycle; use cycle detection to + // avoid looping forever. + std::pair<ReferenceKind, const Node *> collapse(OutputBuffer &OB) const { auto SoFar = std::make_pair(RK, Pointee); + // Track the chain of nodes for the Floyd's 'tortoise and hare' + // cycle-detection algorithm, since getSyntaxNode(S) is impure + PODSmallVector<const Node *, 8> Prev; for (;;) { - const Node *SN = SoFar.second->getSyntaxNode(S); + const Node *SN = SoFar.second->getSyntaxNode(OB); if (SN->getKind() != KReferenceType) break; auto *RT = static_cast<const ReferenceType *>(SN); SoFar.second = RT->Pointee; SoFar.first = std::min(SoFar.first, RT->RK); + + // The middle of Prev is the 'slow' pointer moving at half speed + Prev.push_back(SoFar.second); + if (Prev.size() > 1 && SoFar.second == Prev[(Prev.size() - 1) / 2]) { + // Cycle detected + SoFar.second = nullptr; + break; + } } return SoFar; } @@ -551,31 +683,35 @@ public: template<typename Fn> void match(Fn F) const { F(Pointee, RK); } - bool hasRHSComponentSlow(OutputStream &S) const override { - return Pointee->hasRHSComponent(S); + bool hasRHSComponentSlow(OutputBuffer &OB) const override { + return Pointee->hasRHSComponent(OB); } - void printLeft(OutputStream &s) const override { + void printLeft(OutputBuffer &OB) const override { if (Printing) return; SwapAndRestore<bool> SavePrinting(Printing, true); - std::pair<ReferenceKind, const Node *> Collapsed = collapse(s); - Collapsed.second->printLeft(s); - if (Collapsed.second->hasArray(s)) - s += " "; - if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s)) - s += "("; + std::pair<ReferenceKind, const Node *> Collapsed = collapse(OB); + if (!Collapsed.second) + return; + Collapsed.second->printLeft(OB); + if (Collapsed.second->hasArray(OB)) + OB += " "; + if (Collapsed.second->hasArray(OB) || Collapsed.second->hasFunction(OB)) + OB += "("; - s += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&"); + OB += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&"); } - void printRight(OutputStream &s) const override { + void printRight(OutputBuffer &OB) const override { if (Printing) return; SwapAndRestore<bool> SavePrinting(Printing, true); - std::pair<ReferenceKind, const Node *> Collapsed = collapse(s); - if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s)) - s += ")"; - Collapsed.second->printRight(s); + std::pair<ReferenceKind, const Node *> Collapsed = collapse(OB); + if (!Collapsed.second) + return; + if (Collapsed.second->hasArray(OB) || Collapsed.second->hasFunction(OB)) + OB += ")"; + Collapsed.second->printRight(OB); } }; @@ -590,24 +726,24 @@ public: template<typename Fn> void match(Fn F) const { F(ClassType, MemberType); } - bool hasRHSComponentSlow(OutputStream &S) const override { - return MemberType->hasRHSComponent(S); + bool hasRHSComponentSlow(OutputBuffer &OB) const override { + return MemberType->hasRHSComponent(OB); } - void printLeft(OutputStream &s) const override { - MemberType->printLeft(s); - if (MemberType->hasArray(s) || MemberType->hasFunction(s)) - s += "("; + void printLeft(OutputBuffer &OB) const override { + MemberType->printLeft(OB); + if (MemberType->hasArray(OB) || MemberType->hasFunction(OB)) + OB += "("; else - s += " "; - ClassType->print(s); - s += "::*"; + OB += " "; + ClassType->print(OB); + OB += "::*"; } - void printRight(OutputStream &s) const override { - if (MemberType->hasArray(s) || MemberType->hasFunction(s)) - s += ")"; - MemberType->printRight(s); + void printRight(OutputBuffer &OB) const override { + if (MemberType->hasArray(OB) || MemberType->hasFunction(OB)) + OB += ")"; + MemberType->printRight(OB); } }; @@ -624,19 +760,19 @@ public: template<typename Fn> void match(Fn F) const { F(Base, Dimension); } - bool hasRHSComponentSlow(OutputStream &) const override { return true; } - bool hasArraySlow(OutputStream &) const override { return true; } + bool hasRHSComponentSlow(OutputBuffer &) const override { return true; } + bool hasArraySlow(OutputBuffer &) const override { return true; } - void printLeft(OutputStream &S) const override { Base->printLeft(S); } + void printLeft(OutputBuffer &OB) const override { Base->printLeft(OB); } - void printRight(OutputStream &S) const override { - if (S.back() != ']') - S += " "; - S += "["; + void printRight(OutputBuffer &OB) const override { + if (OB.back() != ']') + OB += " "; + OB += "["; if (Dimension) - Dimension->print(S); - S += "]"; - Base->printRight(S); + Dimension->print(OB); + OB += "]"; + Base->printRight(OB); } }; @@ -660,8 +796,8 @@ public: F(Ret, Params, CVQuals, RefQual, ExceptionSpec); } - bool hasRHSComponentSlow(OutputStream &) const override { return true; } - bool hasFunctionSlow(OutputStream &) const override { return true; } + bool hasRHSComponentSlow(OutputBuffer &) const override { return true; } + bool hasFunctionSlow(OutputBuffer &) const override { return true; } // Handle C++'s ... quirky decl grammar by using the left & right // distinction. Consider: @@ -670,32 +806,32 @@ public: // that takes a char and returns an int. If we're trying to print f, start // by printing out the return types's left, then print our parameters, then // finally print right of the return type. - void printLeft(OutputStream &S) const override { - Ret->printLeft(S); - S += " "; + void printLeft(OutputBuffer &OB) const override { + Ret->printLeft(OB); + OB += " "; } - void printRight(OutputStream &S) const override { - S += "("; - Params.printWithComma(S); - S += ")"; - Ret->printRight(S); + void printRight(OutputBuffer &OB) const override { + OB += "("; + Params.printWithComma(OB); + OB += ")"; + Ret->printRight(OB); if (CVQuals & QualConst) - S += " const"; + OB += " const"; if (CVQuals & QualVolatile) - S += " volatile"; + OB += " volatile"; if (CVQuals & QualRestrict) - S += " restrict"; + OB += " restrict"; if (RefQual == FrefQualLValue) - S += " &"; + OB += " &"; else if (RefQual == FrefQualRValue) - S += " &&"; + OB += " &&"; if (ExceptionSpec != nullptr) { - S += ' '; - ExceptionSpec->print(S); + OB += ' '; + ExceptionSpec->print(OB); } } }; @@ -707,10 +843,10 @@ public: template<typename Fn> void match(Fn F) const { F(E); } - void printLeft(OutputStream &S) const override { - S += "noexcept("; - E->print(S); - S += ")"; + void printLeft(OutputBuffer &OB) const override { + OB += "noexcept("; + E->print(OB); + OB += ")"; } }; @@ -722,10 +858,10 @@ public: template<typename Fn> void match(Fn F) const { F(Types); } - void printLeft(OutputStream &S) const override { - S += "throw("; - Types.printWithComma(S); - S += ')'; + void printLeft(OutputBuffer &OB) const override { + OB += "throw("; + Types.printWithComma(OB); + OB += ')'; } }; @@ -756,41 +892,41 @@ public: NodeArray getParams() const { return Params; } const Node *getReturnType() const { return Ret; } - bool hasRHSComponentSlow(OutputStream &) const override { return true; } - bool hasFunctionSlow(OutputStream &) const override { return true; } + bool hasRHSComponentSlow(OutputBuffer &) const override { return true; } + bool hasFunctionSlow(OutputBuffer &) const override { return true; } const Node *getName() const { return Name; } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { if (Ret) { - Ret->printLeft(S); - if (!Ret->hasRHSComponent(S)) - S += " "; + Ret->printLeft(OB); + if (!Ret->hasRHSComponent(OB)) + OB += " "; } - Name->print(S); + Name->print(OB); } - void printRight(OutputStream &S) const override { - S += "("; - Params.printWithComma(S); - S += ")"; + void printRight(OutputBuffer &OB) const override { + OB += "("; + Params.printWithComma(OB); + OB += ")"; if (Ret) - Ret->printRight(S); + Ret->printRight(OB); if (CVQuals & QualConst) - S += " const"; + OB += " const"; if (CVQuals & QualVolatile) - S += " volatile"; + OB += " volatile"; if (CVQuals & QualRestrict) - S += " restrict"; + OB += " restrict"; if (RefQual == FrefQualLValue) - S += " &"; + OB += " &"; else if (RefQual == FrefQualRValue) - S += " &&"; + OB += " &&"; if (Attrs != nullptr) - Attrs->print(S); + Attrs->print(OB); } }; @@ -803,9 +939,9 @@ public: template<typename Fn> void match(Fn F) const { F(OpName); } - void printLeft(OutputStream &S) const override { - S += "operator\"\" "; - OpName->print(S); + void printLeft(OutputBuffer &OB) const override { + OB += "operator\"\" "; + OpName->print(OB); } }; @@ -819,9 +955,9 @@ public: template<typename Fn> void match(Fn F) const { F(Special, Child); } - void printLeft(OutputStream &S) const override { - S += Special; - Child->print(S); + void printLeft(OutputBuffer &OB) const override { + OB += Special; + Child->print(OB); } }; @@ -836,11 +972,11 @@ public: template<typename Fn> void match(Fn F) const { F(FirstType, SecondType); } - void printLeft(OutputStream &S) const override { - S += "construction vtable for "; - FirstType->print(S); - S += "-in-"; - SecondType->print(S); + void printLeft(OutputBuffer &OB) const override { + OB += "construction vtable for "; + FirstType->print(OB); + OB += "-in-"; + SecondType->print(OB); } }; @@ -855,10 +991,10 @@ struct NestedName : Node { StringView getBaseName() const override { return Name->getBaseName(); } - void printLeft(OutputStream &S) const override { - Qual->print(S); - S += "::"; - Name->print(S); + void printLeft(OutputBuffer &OB) const override { + Qual->print(OB); + OB += "::"; + Name->print(OB); } }; @@ -871,10 +1007,10 @@ struct LocalName : Node { template<typename Fn> void match(Fn F) const { F(Encoding, Entity); } - void printLeft(OutputStream &S) const override { - Encoding->print(S); - S += "::"; - Entity->print(S); + void printLeft(OutputBuffer &OB) const override { + Encoding->print(OB); + OB += "::"; + Entity->print(OB); } }; @@ -891,10 +1027,10 @@ public: StringView getBaseName() const override { return Name->getBaseName(); } - void printLeft(OutputStream &S) const override { - Qualifier->print(S); - S += "::"; - Name->print(S); + void printLeft(OutputBuffer &OB) const override { + Qualifier->print(OB); + OB += "::"; + Name->print(OB); } }; @@ -909,12 +1045,12 @@ public: template<typename Fn> void match(Fn F) const { F(BaseType, Dimension); } - void printLeft(OutputStream &S) const override { - BaseType->print(S); - S += " vector["; + void printLeft(OutputBuffer &OB) const override { + BaseType->print(OB); + OB += " vector["; if (Dimension) - Dimension->print(S); - S += "]"; + Dimension->print(OB); + OB += "]"; } }; @@ -927,11 +1063,26 @@ public: template<typename Fn> void match(Fn F) const { F(Dimension); } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { // FIXME: This should demangle as "vector pixel". - S += "pixel vector["; - Dimension->print(S); - S += "]"; + OB += "pixel vector["; + Dimension->print(OB); + OB += "]"; + } +}; + +class BinaryFPType final : public Node { + const Node *Dimension; + +public: + BinaryFPType(const Node *Dimension_) + : Node(KBinaryFPType), Dimension(Dimension_) {} + + template<typename Fn> void match(Fn F) const { F(Dimension); } + + void printLeft(OutputBuffer &OB) const override { + OB += "_Float"; + Dimension->print(OB); } }; @@ -953,20 +1104,20 @@ public: template<typename Fn> void match(Fn F) const { F(Kind, Index); } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { switch (Kind) { case TemplateParamKind::Type: - S += "$T"; + OB += "$T"; break; case TemplateParamKind::NonType: - S += "$N"; + OB += "$N"; break; case TemplateParamKind::Template: - S += "$TT"; + OB += "$TT"; break; } if (Index > 0) - S << Index - 1; + OB << Index - 1; } }; @@ -980,13 +1131,9 @@ public: template<typename Fn> void match(Fn F) const { F(Name); } - void printLeft(OutputStream &S) const override { - S += "typename "; - } + void printLeft(OutputBuffer &OB) const override { OB += "typename "; } - void printRight(OutputStream &S) const override { - Name->print(S); - } + void printRight(OutputBuffer &OB) const override { Name->print(OB); } }; /// A non-type template parameter declaration, 'int N'. @@ -1000,15 +1147,15 @@ public: template<typename Fn> void match(Fn F) const { F(Name, Type); } - void printLeft(OutputStream &S) const override { - Type->printLeft(S); - if (!Type->hasRHSComponent(S)) - S += " "; + void printLeft(OutputBuffer &OB) const override { + Type->printLeft(OB); + if (!Type->hasRHSComponent(OB)) + OB += " "; } - void printRight(OutputStream &S) const override { - Name->print(S); - Type->printRight(S); + void printRight(OutputBuffer &OB) const override { + Name->print(OB); + Type->printRight(OB); } }; @@ -1025,15 +1172,13 @@ public: template<typename Fn> void match(Fn F) const { F(Name, Params); } - void printLeft(OutputStream &S) const override { - S += "template<"; - Params.printWithComma(S); - S += "> typename "; + void printLeft(OutputBuffer &OB) const override { + OB += "template<"; + Params.printWithComma(OB); + OB += "> typename "; } - void printRight(OutputStream &S) const override { - Name->print(S); - } + void printRight(OutputBuffer &OB) const override { Name->print(OB); } }; /// A template parameter pack declaration, 'typename ...T'. @@ -1046,14 +1191,12 @@ public: template<typename Fn> void match(Fn F) const { F(Param); } - void printLeft(OutputStream &S) const override { - Param->printLeft(S); - S += "..."; + void printLeft(OutputBuffer &OB) const override { + Param->printLeft(OB); + OB += "..."; } - void printRight(OutputStream &S) const override { - Param->printRight(S); - } + void printRight(OutputBuffer &OB) const override { Param->printRight(OB); } }; /// An unexpanded parameter pack (either in the expression or type context). If @@ -1067,11 +1210,11 @@ public: class ParameterPack final : public Node { NodeArray Data; - // Setup OutputStream for a pack expansion unless we're already expanding one. - void initializePackExpansion(OutputStream &S) const { - if (S.CurrentPackMax == std::numeric_limits<unsigned>::max()) { - S.CurrentPackMax = static_cast<unsigned>(Data.size()); - S.CurrentPackIndex = 0; + // Setup OutputBuffer for a pack expansion unless we're already expanding one. + void initializePackExpansion(OutputBuffer &OB) const { + if (OB.CurrentPackMax == std::numeric_limits<unsigned>::max()) { + OB.CurrentPackMax = static_cast<unsigned>(Data.size()); + OB.CurrentPackIndex = 0; } } @@ -1094,38 +1237,38 @@ public: template<typename Fn> void match(Fn F) const { F(Data); } - bool hasRHSComponentSlow(OutputStream &S) const override { - initializePackExpansion(S); - size_t Idx = S.CurrentPackIndex; - return Idx < Data.size() && Data[Idx]->hasRHSComponent(S); + bool hasRHSComponentSlow(OutputBuffer &OB) const override { + initializePackExpansion(OB); + size_t Idx = OB.CurrentPackIndex; + return Idx < Data.size() && Data[Idx]->hasRHSComponent(OB); } - bool hasArraySlow(OutputStream &S) const override { - initializePackExpansion(S); - size_t Idx = S.CurrentPackIndex; - return Idx < Data.size() && Data[Idx]->hasArray(S); + bool hasArraySlow(OutputBuffer &OB) const override { + initializePackExpansion(OB); + size_t Idx = OB.CurrentPackIndex; + return Idx < Data.size() && Data[Idx]->hasArray(OB); } - bool hasFunctionSlow(OutputStream &S) const override { - initializePackExpansion(S); - size_t Idx = S.CurrentPackIndex; - return Idx < Data.size() && Data[Idx]->hasFunction(S); + bool hasFunctionSlow(OutputBuffer &OB) const override { + initializePackExpansion(OB); + size_t Idx = OB.CurrentPackIndex; + return Idx < Data.size() && Data[Idx]->hasFunction(OB); } - const Node *getSyntaxNode(OutputStream &S) const override { - initializePackExpansion(S); - size_t Idx = S.CurrentPackIndex; - return Idx < Data.size() ? Data[Idx]->getSyntaxNode(S) : this; + const Node *getSyntaxNode(OutputBuffer &OB) const override { + initializePackExpansion(OB); + size_t Idx = OB.CurrentPackIndex; + return Idx < Data.size() ? Data[Idx]->getSyntaxNode(OB) : this; } - void printLeft(OutputStream &S) const override { - initializePackExpansion(S); - size_t Idx = S.CurrentPackIndex; + void printLeft(OutputBuffer &OB) const override { + initializePackExpansion(OB); + size_t Idx = OB.CurrentPackIndex; if (Idx < Data.size()) - Data[Idx]->printLeft(S); + Data[Idx]->printLeft(OB); } - void printRight(OutputStream &S) const override { - initializePackExpansion(S); - size_t Idx = S.CurrentPackIndex; + void printRight(OutputBuffer &OB) const override { + initializePackExpansion(OB); + size_t Idx = OB.CurrentPackIndex; if (Idx < Data.size()) - Data[Idx]->printRight(S); + Data[Idx]->printRight(OB); } }; @@ -1144,8 +1287,8 @@ public: NodeArray getElements() const { return Elements; } - void printLeft(OutputStream &S) const override { - Elements.printWithComma(S); + void printLeft(OutputBuffer &OB) const override { + Elements.printWithComma(OB); } }; @@ -1162,35 +1305,35 @@ public: const Node *getChild() const { return Child; } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { constexpr unsigned Max = std::numeric_limits<unsigned>::max(); - SwapAndRestore<unsigned> SavePackIdx(S.CurrentPackIndex, Max); - SwapAndRestore<unsigned> SavePackMax(S.CurrentPackMax, Max); - size_t StreamPos = S.getCurrentPosition(); + SwapAndRestore<unsigned> SavePackIdx(OB.CurrentPackIndex, Max); + SwapAndRestore<unsigned> SavePackMax(OB.CurrentPackMax, Max); + size_t StreamPos = OB.getCurrentPosition(); // Print the first element in the pack. If Child contains a ParameterPack, // it will set up S.CurrentPackMax and print the first element. - Child->print(S); + Child->print(OB); // No ParameterPack was found in Child. This can occur if we've found a pack // expansion on a <function-param>. - if (S.CurrentPackMax == Max) { - S += "..."; + if (OB.CurrentPackMax == Max) { + OB += "..."; return; } // We found a ParameterPack, but it has no elements. Erase whatever we may // of printed. - if (S.CurrentPackMax == 0) { - S.setCurrentPosition(StreamPos); + if (OB.CurrentPackMax == 0) { + OB.setCurrentPosition(StreamPos); return; } // Else, iterate through the rest of the elements in the pack. - for (unsigned I = 1, E = S.CurrentPackMax; I < E; ++I) { - S += ", "; - S.CurrentPackIndex = I; - Child->print(S); + for (unsigned I = 1, E = OB.CurrentPackMax; I < E; ++I) { + OB += ", "; + OB.CurrentPackIndex = I; + Child->print(OB); } } }; @@ -1205,12 +1348,12 @@ public: NodeArray getParams() { return Params; } - void printLeft(OutputStream &S) const override { - S += "<"; - Params.printWithComma(S); - if (S.back() == '>') - S += " "; - S += ">"; + void printLeft(OutputBuffer &OB) const override { + OB += "<"; + Params.printWithComma(OB); + if (OB.back() == '>') + OB += " "; + OB += ">"; } }; @@ -1252,42 +1395,42 @@ struct ForwardTemplateReference : Node { // special handling. template<typename Fn> void match(Fn F) const = delete; - bool hasRHSComponentSlow(OutputStream &S) const override { + bool hasRHSComponentSlow(OutputBuffer &OB) const override { if (Printing) return false; SwapAndRestore<bool> SavePrinting(Printing, true); - return Ref->hasRHSComponent(S); + return Ref->hasRHSComponent(OB); } - bool hasArraySlow(OutputStream &S) const override { + bool hasArraySlow(OutputBuffer &OB) const override { if (Printing) return false; SwapAndRestore<bool> SavePrinting(Printing, true); - return Ref->hasArray(S); + return Ref->hasArray(OB); } - bool hasFunctionSlow(OutputStream &S) const override { + bool hasFunctionSlow(OutputBuffer &OB) const override { if (Printing) return false; SwapAndRestore<bool> SavePrinting(Printing, true); - return Ref->hasFunction(S); + return Ref->hasFunction(OB); } - const Node *getSyntaxNode(OutputStream &S) const override { + const Node *getSyntaxNode(OutputBuffer &OB) const override { if (Printing) return this; SwapAndRestore<bool> SavePrinting(Printing, true); - return Ref->getSyntaxNode(S); + return Ref->getSyntaxNode(OB); } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { if (Printing) return; SwapAndRestore<bool> SavePrinting(Printing, true); - Ref->printLeft(S); + Ref->printLeft(OB); } - void printRight(OutputStream &S) const override { + void printRight(OutputBuffer &OB) const override { if (Printing) return; SwapAndRestore<bool> SavePrinting(Printing, true); - Ref->printRight(S); + Ref->printRight(OB); } }; @@ -1303,9 +1446,9 @@ struct NameWithTemplateArgs : Node { StringView getBaseName() const override { return Name->getBaseName(); } - void printLeft(OutputStream &S) const override { - Name->print(S); - TemplateArgs->print(S); + void printLeft(OutputBuffer &OB) const override { + Name->print(OB); + TemplateArgs->print(OB); } }; @@ -1320,9 +1463,9 @@ public: StringView getBaseName() const override { return Child->getBaseName(); } - void printLeft(OutputStream &S) const override { - S += "::"; - Child->print(S); + void printLeft(OutputBuffer &OB) const override { + OB += "::"; + Child->print(OB); } }; @@ -1335,9 +1478,9 @@ struct StdQualifiedName : Node { StringView getBaseName() const override { return Child->getBaseName(); } - void printLeft(OutputStream &S) const override { - S += "std::"; - Child->print(S); + void printLeft(OutputBuffer &OB) const override { + OB += "std::"; + Child->print(OB); } }; @@ -1377,26 +1520,26 @@ public: DEMANGLE_UNREACHABLE; } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { switch (SSK) { case SpecialSubKind::allocator: - S += "std::allocator"; + OB += "std::allocator"; break; case SpecialSubKind::basic_string: - S += "std::basic_string"; + OB += "std::basic_string"; break; case SpecialSubKind::string: - S += "std::basic_string<char, std::char_traits<char>, " - "std::allocator<char> >"; + OB += "std::basic_string<char, std::char_traits<char>, " + "std::allocator<char> >"; break; case SpecialSubKind::istream: - S += "std::basic_istream<char, std::char_traits<char> >"; + OB += "std::basic_istream<char, std::char_traits<char> >"; break; case SpecialSubKind::ostream: - S += "std::basic_ostream<char, std::char_traits<char> >"; + OB += "std::basic_ostream<char, std::char_traits<char> >"; break; case SpecialSubKind::iostream: - S += "std::basic_iostream<char, std::char_traits<char> >"; + OB += "std::basic_iostream<char, std::char_traits<char> >"; break; } } @@ -1429,25 +1572,25 @@ public: DEMANGLE_UNREACHABLE; } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { switch (SSK) { case SpecialSubKind::allocator: - S += "std::allocator"; + OB += "std::allocator"; break; case SpecialSubKind::basic_string: - S += "std::basic_string"; + OB += "std::basic_string"; break; case SpecialSubKind::string: - S += "std::string"; + OB += "std::string"; break; case SpecialSubKind::istream: - S += "std::istream"; + OB += "std::istream"; break; case SpecialSubKind::ostream: - S += "std::ostream"; + OB += "std::ostream"; break; case SpecialSubKind::iostream: - S += "std::iostream"; + OB += "std::iostream"; break; } } @@ -1465,10 +1608,10 @@ public: template<typename Fn> void match(Fn F) const { F(Basename, IsDtor, Variant); } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { if (IsDtor) - S += "~"; - S += Basename->getBaseName(); + OB += "~"; + OB += Basename->getBaseName(); } }; @@ -1480,9 +1623,9 @@ public: template<typename Fn> void match(Fn F) const { F(Base); } - void printLeft(OutputStream &S) const override { - S += "~"; - Base->printLeft(S); + void printLeft(OutputBuffer &OB) const override { + OB += "~"; + Base->printLeft(OB); } }; @@ -1494,10 +1637,10 @@ public: template<typename Fn> void match(Fn F) const { F(Count); } - void printLeft(OutputStream &S) const override { - S += "'unnamed"; - S += Count; - S += "\'"; + void printLeft(OutputBuffer &OB) const override { + OB += "'unnamed"; + OB += Count; + OB += "\'"; } }; @@ -1516,22 +1659,22 @@ public: F(TemplateParams, Params, Count); } - void printDeclarator(OutputStream &S) const { + void printDeclarator(OutputBuffer &OB) const { if (!TemplateParams.empty()) { - S += "<"; - TemplateParams.printWithComma(S); - S += ">"; + OB += "<"; + TemplateParams.printWithComma(OB); + OB += ">"; } - S += "("; - Params.printWithComma(S); - S += ")"; + OB += "("; + Params.printWithComma(OB); + OB += ")"; } - void printLeft(OutputStream &S) const override { - S += "\'lambda"; - S += Count; - S += "\'"; - printDeclarator(S); + void printLeft(OutputBuffer &OB) const override { + OB += "\'lambda"; + OB += Count; + OB += "\'"; + printDeclarator(OB); } }; @@ -1543,10 +1686,10 @@ public: template<typename Fn> void match(Fn F) const { F(Bindings); } - void printLeft(OutputStream &S) const override { - S += '['; - Bindings.printWithComma(S); - S += ']'; + void printLeft(OutputBuffer &OB) const override { + OB += '['; + Bindings.printWithComma(OB); + OB += ']'; } }; @@ -1564,22 +1707,22 @@ public: template<typename Fn> void match(Fn F) const { F(LHS, InfixOperator, RHS); } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { // might be a template argument expression, then we need to disambiguate // with parens. if (InfixOperator == ">") - S += "("; + OB += "("; - S += "("; - LHS->print(S); - S += ") "; - S += InfixOperator; - S += " ("; - RHS->print(S); - S += ")"; + OB += "("; + LHS->print(OB); + OB += ") "; + OB += InfixOperator; + OB += " ("; + RHS->print(OB); + OB += ")"; if (InfixOperator == ">") - S += ")"; + OB += ")"; } }; @@ -1593,12 +1736,12 @@ public: template<typename Fn> void match(Fn F) const { F(Op1, Op2); } - void printLeft(OutputStream &S) const override { - S += "("; - Op1->print(S); - S += ")["; - Op2->print(S); - S += "]"; + void printLeft(OutputBuffer &OB) const override { + OB += "("; + Op1->print(OB); + OB += ")["; + Op2->print(OB); + OB += "]"; } }; @@ -1612,11 +1755,11 @@ public: template<typename Fn> void match(Fn F) const { F(Child, Operator); } - void printLeft(OutputStream &S) const override { - S += "("; - Child->print(S); - S += ")"; - S += Operator; + void printLeft(OutputBuffer &OB) const override { + OB += "("; + Child->print(OB); + OB += ")"; + OB += Operator; } }; @@ -1631,14 +1774,14 @@ public: template<typename Fn> void match(Fn F) const { F(Cond, Then, Else); } - void printLeft(OutputStream &S) const override { - S += "("; - Cond->print(S); - S += ") ? ("; - Then->print(S); - S += ") : ("; - Else->print(S); - S += ")"; + void printLeft(OutputBuffer &OB) const override { + OB += "("; + Cond->print(OB); + OB += ") ? ("; + Then->print(OB); + OB += ") : ("; + Else->print(OB); + OB += ")"; } }; @@ -1653,10 +1796,10 @@ public: template<typename Fn> void match(Fn F) const { F(LHS, Kind, RHS); } - void printLeft(OutputStream &S) const override { - LHS->print(S); - S += Kind; - RHS->print(S); + void printLeft(OutputBuffer &OB) const override { + LHS->print(OB); + OB += Kind; + RHS->print(OB); } }; @@ -1677,20 +1820,20 @@ public: F(Type, SubExpr, Offset, UnionSelectors, OnePastTheEnd); } - void printLeft(OutputStream &S) const override { - SubExpr->print(S); - S += ".<"; - Type->print(S); - S += " at offset "; + void printLeft(OutputBuffer &OB) const override { + SubExpr->print(OB); + OB += ".<"; + Type->print(OB); + OB += " at offset "; if (Offset.empty()) { - S += "0"; + OB += "0"; } else if (Offset[0] == 'n') { - S += "-"; - S += Offset.dropFront(); + OB += "-"; + OB += Offset.dropFront(); } else { - S += Offset; + OB += Offset; } - S += ">"; + OB += ">"; } }; @@ -1706,10 +1849,10 @@ public: template<typename Fn> void match(Fn F) const { F(Prefix, Infix, Postfix); } - void printLeft(OutputStream &S) const override { - S += Prefix; - Infix->print(S); - S += Postfix; + void printLeft(OutputBuffer &OB) const override { + OB += Prefix; + Infix->print(OB); + OB += Postfix; } }; @@ -1725,13 +1868,13 @@ public: template<typename Fn> void match(Fn F) const { F(CastKind, To, From); } - void printLeft(OutputStream &S) const override { - S += CastKind; - S += "<"; - To->printLeft(S); - S += ">("; - From->printLeft(S); - S += ")"; + void printLeft(OutputBuffer &OB) const override { + OB += CastKind; + OB += "<"; + To->printLeft(OB); + OB += ">("; + From->printLeft(OB); + OB += ")"; } }; @@ -1744,11 +1887,11 @@ public: template<typename Fn> void match(Fn F) const { F(Pack); } - void printLeft(OutputStream &S) const override { - S += "sizeof...("; + void printLeft(OutputBuffer &OB) const override { + OB += "sizeof...("; ParameterPackExpansion PPE(Pack); - PPE.printLeft(S); - S += ")"; + PPE.printLeft(OB); + OB += ")"; } }; @@ -1762,11 +1905,11 @@ public: template<typename Fn> void match(Fn F) const { F(Callee, Args); } - void printLeft(OutputStream &S) const override { - Callee->print(S); - S += "("; - Args.printWithComma(S); - S += ")"; + void printLeft(OutputBuffer &OB) const override { + Callee->print(OB); + OB += "("; + Args.printWithComma(OB); + OB += ")"; } }; @@ -1787,25 +1930,24 @@ public: F(ExprList, Type, InitList, IsGlobal, IsArray); } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { if (IsGlobal) - S += "::operator "; - S += "new"; + OB += "::operator "; + OB += "new"; if (IsArray) - S += "[]"; - S += ' '; + OB += "[]"; + OB += ' '; if (!ExprList.empty()) { - S += "("; - ExprList.printWithComma(S); - S += ")"; + OB += "("; + ExprList.printWithComma(OB); + OB += ")"; } - Type->print(S); + Type->print(OB); if (!InitList.empty()) { - S += "("; - InitList.printWithComma(S); - S += ")"; + OB += "("; + InitList.printWithComma(OB); + OB += ")"; } - } }; @@ -1820,13 +1962,13 @@ public: template<typename Fn> void match(Fn F) const { F(Op, IsGlobal, IsArray); } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { if (IsGlobal) - S += "::"; - S += "delete"; + OB += "::"; + OB += "delete"; if (IsArray) - S += "[] "; - Op->print(S); + OB += "[] "; + Op->print(OB); } }; @@ -1840,11 +1982,11 @@ public: template<typename Fn> void match(Fn F) const { F(Prefix, Child); } - void printLeft(OutputStream &S) const override { - S += Prefix; - S += "("; - Child->print(S); - S += ")"; + void printLeft(OutputBuffer &OB) const override { + OB += Prefix; + OB += "("; + Child->print(OB); + OB += ")"; } }; @@ -1856,9 +1998,9 @@ public: template<typename Fn> void match(Fn F) const { F(Number); } - void printLeft(OutputStream &S) const override { - S += "fp"; - S += Number; + void printLeft(OutputBuffer &OB) const override { + OB += "fp"; + OB += Number; } }; @@ -1872,12 +2014,12 @@ public: template<typename Fn> void match(Fn F) const { F(Type, Expressions); } - void printLeft(OutputStream &S) const override { - S += "("; - Type->print(S); - S += ")("; - Expressions.printWithComma(S); - S += ")"; + void printLeft(OutputBuffer &OB) const override { + OB += "("; + Type->print(OB); + OB += ")("; + Expressions.printWithComma(OB); + OB += ")"; } }; @@ -1894,12 +2036,12 @@ public: template<typename Fn> void match(Fn F) const { F(Type, SubExpr, Offset); } - void printLeft(OutputStream &S) const override { - S += "("; - Type->print(S); - S += ")("; - SubExpr->print(S); - S += ")"; + void printLeft(OutputBuffer &OB) const override { + OB += "("; + Type->print(OB); + OB += ")("; + SubExpr->print(OB); + OB += ")"; } }; @@ -1912,12 +2054,12 @@ public: template<typename Fn> void match(Fn F) const { F(Ty, Inits); } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { if (Ty) - Ty->print(S); - S += '{'; - Inits.printWithComma(S); - S += '}'; + Ty->print(OB); + OB += '{'; + Inits.printWithComma(OB); + OB += '}'; } }; @@ -1931,18 +2073,18 @@ public: template<typename Fn> void match(Fn F) const { F(Elem, Init, IsArray); } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { if (IsArray) { - S += '['; - Elem->print(S); - S += ']'; + OB += '['; + Elem->print(OB); + OB += ']'; } else { - S += '.'; - Elem->print(S); + OB += '.'; + Elem->print(OB); } if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr) - S += " = "; - Init->print(S); + OB += " = "; + Init->print(OB); } }; @@ -1956,15 +2098,15 @@ public: template<typename Fn> void match(Fn F) const { F(First, Last, Init); } - void printLeft(OutputStream &S) const override { - S += '['; - First->print(S); - S += " ... "; - Last->print(S); - S += ']'; + void printLeft(OutputBuffer &OB) const override { + OB += '['; + First->print(OB); + OB += " ... "; + Last->print(OB); + OB += ']'; if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr) - S += " = "; - Init->print(S); + OB += " = "; + Init->print(OB); } }; @@ -1983,43 +2125,43 @@ public: F(IsLeftFold, OperatorName, Pack, Init); } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { auto PrintPack = [&] { - S += '('; - ParameterPackExpansion(Pack).print(S); - S += ')'; + OB += '('; + ParameterPackExpansion(Pack).print(OB); + OB += ')'; }; - S += '('; + OB += '('; if (IsLeftFold) { // init op ... op pack if (Init != nullptr) { - Init->print(S); - S += ' '; - S += OperatorName; - S += ' '; + Init->print(OB); + OB += ' '; + OB += OperatorName; + OB += ' '; } // ... op pack - S += "... "; - S += OperatorName; - S += ' '; + OB += "... "; + OB += OperatorName; + OB += ' '; PrintPack(); } else { // !IsLeftFold // pack op ... PrintPack(); - S += ' '; - S += OperatorName; - S += " ..."; + OB += ' '; + OB += OperatorName; + OB += " ..."; // pack op ... op init if (Init != nullptr) { - S += ' '; - S += OperatorName; - S += ' '; - Init->print(S); + OB += ' '; + OB += OperatorName; + OB += ' '; + Init->print(OB); } } - S += ')'; + OB += ')'; } }; @@ -2031,9 +2173,9 @@ public: template<typename Fn> void match(Fn F) const { F(Op); } - void printLeft(OutputStream &S) const override { - S += "throw "; - Op->print(S); + void printLeft(OutputBuffer &OB) const override { + OB += "throw "; + Op->print(OB); } }; @@ -2045,8 +2187,8 @@ public: template<typename Fn> void match(Fn F) const { F(Value); } - void printLeft(OutputStream &S) const override { - S += Value ? StringView("true") : StringView("false"); + void printLeft(OutputBuffer &OB) const override { + OB += Value ? StringView("true") : StringView("false"); } }; @@ -2058,10 +2200,10 @@ public: template<typename Fn> void match(Fn F) const { F(Type); } - void printLeft(OutputStream &S) const override { - S += "\"<"; - Type->print(S); - S += ">\""; + void printLeft(OutputBuffer &OB) const override { + OB += "\"<"; + Type->print(OB); + OB += ">\""; } }; @@ -2073,11 +2215,11 @@ public: template<typename Fn> void match(Fn F) const { F(Type); } - void printLeft(OutputStream &S) const override { - S += "[]"; + void printLeft(OutputBuffer &OB) const override { + OB += "[]"; if (Type->getKind() == KClosureTypeName) - static_cast<const ClosureTypeName *>(Type)->printDeclarator(S); - S += "{...}"; + static_cast<const ClosureTypeName *>(Type)->printDeclarator(OB); + OB += "{...}"; } }; @@ -2092,15 +2234,15 @@ public: template<typename Fn> void match(Fn F) const { F(Ty, Integer); } - void printLeft(OutputStream &S) const override { - S << "("; - Ty->print(S); - S << ")"; + void printLeft(OutputBuffer &OB) const override { + OB << "("; + Ty->print(OB); + OB << ")"; if (Integer[0] == 'n') - S << "-" << Integer.dropFront(1); + OB << "-" << Integer.dropFront(1); else - S << Integer; + OB << Integer; } }; @@ -2114,21 +2256,21 @@ public: template<typename Fn> void match(Fn F) const { F(Type, Value); } - void printLeft(OutputStream &S) const override { + void printLeft(OutputBuffer &OB) const override { if (Type.size() > 3) { - S += "("; - S += Type; - S += ")"; + OB += "("; + OB += Type; + OB += ")"; } if (Value[0] == 'n') { - S += "-"; - S += Value.dropFront(1); + OB += "-"; + OB += Value.dropFront(1); } else - S += Value; + OB += Value; if (Type.size() <= 3) - S += Type; + OB += Type; } }; @@ -2158,7 +2300,7 @@ public: template<typename Fn> void match(Fn F) const { F(Contents); } - void printLeft(OutputStream &s) const override { + void printLeft(OutputBuffer &OB) const override { const char *first = Contents.begin(); const char *last = Contents.end() + 1; @@ -2184,7 +2326,7 @@ public: #endif char num[FloatData<Float>::max_demangled_size] = {0}; int n = snprintf(num, sizeof(num), FloatData<Float>::spec, value); - s += StringView(num, num + n); + OB += StringView(num, num + n); } } }; @@ -2217,125 +2359,6 @@ FOR_EACH_NODE_KIND(SPECIALIZATION) #undef FOR_EACH_NODE_KIND -template <class T, size_t N> -class PODSmallVector { - static_assert(std::is_pod<T>::value, - "T is required to be a plain old data type"); - - T* First = nullptr; - T* Last = nullptr; - T* Cap = nullptr; - T Inline[N] = {0}; - - bool isInline() const { return First == Inline; } - - void clearInline() { - First = Inline; - Last = Inline; - Cap = Inline + N; - } - - void reserve(size_t NewCap) { - size_t S = size(); - if (isInline()) { - auto* Tmp = static_cast<T*>(std::malloc(NewCap * sizeof(T))); - if (Tmp == nullptr) - std::terminate(); - std::copy(First, Last, Tmp); - First = Tmp; - } else { - First = static_cast<T*>(std::realloc(First, NewCap * sizeof(T))); - if (First == nullptr) - std::terminate(); - } - Last = First + S; - Cap = First + NewCap; - } - -public: - PODSmallVector() : First(Inline), Last(First), Cap(Inline + N) {} - - PODSmallVector(const PODSmallVector&) = delete; - PODSmallVector& operator=(const PODSmallVector&) = delete; - - PODSmallVector(PODSmallVector&& Other) : PODSmallVector() { - if (Other.isInline()) { - std::copy(Other.begin(), Other.end(), First); - Last = First + Other.size(); - Other.clear(); - return; - } - - First = Other.First; - Last = Other.Last; - Cap = Other.Cap; - Other.clearInline(); - } - - PODSmallVector& operator=(PODSmallVector&& Other) { - if (Other.isInline()) { - if (!isInline()) { - std::free(First); - clearInline(); - } - std::copy(Other.begin(), Other.end(), First); - Last = First + Other.size(); - Other.clear(); - return *this; - } - - if (isInline()) { - First = Other.First; - Last = Other.Last; - Cap = Other.Cap; - Other.clearInline(); - return *this; - } - - std::swap(First, Other.First); - std::swap(Last, Other.Last); - std::swap(Cap, Other.Cap); - Other.clear(); - return *this; - } - - void push_back(const T& Elem) { - if (Last == Cap) - reserve(size() * 2); - *Last++ = Elem; - } - - void pop_back() { - assert(Last != First && "Popping empty vector!"); - --Last; - } - - void dropBack(size_t Index) { - assert(Index <= size() && "dropBack() can't expand!"); - Last = First + Index; - } - - T* begin() { return First; } - T* end() { return Last; } - - bool empty() const { return First == Last; } - size_t size() const { return static_cast<size_t>(Last - First); } - T& back() { - assert(Last != First && "Calling back() on empty vector!"); - return *(Last - 1); - } - T& operator[](size_t Index) { - assert(Index < size() && "Invalid access!"); - return *(begin() + Index); - } - void clear() { Last = First; } - - ~PODSmallVector() { - if (!isInline()) - std::free(First); - } -}; - template <typename Derived, typename Alloc> struct AbstractManglingParser { const char *First; const char *Last; @@ -3884,6 +3907,16 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() { case 'h': First += 2; return make<NameType>("half"); + // ::= DF <number> _ # ISO/IEC TS 18661 binary floating point (N bits) + case 'F': { + First += 2; + Node *DimensionNumber = make<NameType>(parseNumber()); + if (!DimensionNumber) + return nullptr; + if (!consumeIf('_')) + return nullptr; + return make<BinaryFPType>(DimensionNumber); + } // ::= Di # char32_t case 'i': First += 2; diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h index 77446e9b0f07..46daa3885a06 100644 --- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h +++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h @@ -21,11 +21,11 @@ namespace llvm { namespace itanium_demangle { -class OutputStream; +class OutputBuffer; } } -using llvm::itanium_demangle::OutputStream; +using llvm::itanium_demangle::OutputBuffer; using llvm::itanium_demangle::StringView; namespace llvm { @@ -80,6 +80,7 @@ enum OutputFlags { OF_NoAccessSpecifier = 4, OF_NoMemberType = 8, OF_NoReturnType = 16, + OF_NoVariableType = 32, }; // Types @@ -261,7 +262,7 @@ struct Node { NodeKind kind() const { return Kind; } - virtual void output(OutputStream &OS, OutputFlags Flags) const = 0; + virtual void output(OutputBuffer &OB, OutputFlags Flags) const = 0; std::string toString(OutputFlags Flags = OF_Default) const; @@ -300,12 +301,12 @@ struct SpecialTableSymbolNode; struct TypeNode : public Node { explicit TypeNode(NodeKind K) : Node(K) {} - virtual void outputPre(OutputStream &OS, OutputFlags Flags) const = 0; - virtual void outputPost(OutputStream &OS, OutputFlags Flags) const = 0; + virtual void outputPre(OutputBuffer &OB, OutputFlags Flags) const = 0; + virtual void outputPost(OutputBuffer &OB, OutputFlags Flags) const = 0; - void output(OutputStream &OS, OutputFlags Flags) const override { - outputPre(OS, Flags); - outputPost(OS, Flags); + void output(OutputBuffer &OB, OutputFlags Flags) const override { + outputPre(OB, Flags); + outputPost(OB, Flags); } Qualifiers Quals = Q_None; @@ -315,8 +316,8 @@ struct PrimitiveTypeNode : public TypeNode { explicit PrimitiveTypeNode(PrimitiveKind K) : TypeNode(NodeKind::PrimitiveType), PrimKind(K) {} - void outputPre(OutputStream &OS, OutputFlags Flags) const override; - void outputPost(OutputStream &OS, OutputFlags Flags) const override {} + void outputPre(OutputBuffer &OB, OutputFlags Flags) const override; + void outputPost(OutputBuffer &OB, OutputFlags Flags) const override {} PrimitiveKind PrimKind; }; @@ -325,8 +326,8 @@ struct FunctionSignatureNode : public TypeNode { explicit FunctionSignatureNode(NodeKind K) : TypeNode(K) {} FunctionSignatureNode() : TypeNode(NodeKind::FunctionSignature) {} - void outputPre(OutputStream &OS, OutputFlags Flags) const override; - void outputPost(OutputStream &OS, OutputFlags Flags) const override; + void outputPre(OutputBuffer &OB, OutputFlags Flags) const override; + void outputPost(OutputBuffer &OB, OutputFlags Flags) const override; // Valid if this FunctionTypeNode is the Pointee of a PointerType or // MemberPointerType. @@ -359,13 +360,13 @@ struct IdentifierNode : public Node { NodeArrayNode *TemplateParams = nullptr; protected: - void outputTemplateParameters(OutputStream &OS, OutputFlags Flags) const; + void outputTemplateParameters(OutputBuffer &OB, OutputFlags Flags) const; }; struct VcallThunkIdentifierNode : public IdentifierNode { VcallThunkIdentifierNode() : IdentifierNode(NodeKind::VcallThunkIdentifier) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; uint64_t OffsetInVTable = 0; }; @@ -374,7 +375,7 @@ struct DynamicStructorIdentifierNode : public IdentifierNode { DynamicStructorIdentifierNode() : IdentifierNode(NodeKind::DynamicStructorIdentifier) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; VariableSymbolNode *Variable = nullptr; QualifiedNameNode *Name = nullptr; @@ -384,7 +385,7 @@ struct DynamicStructorIdentifierNode : public IdentifierNode { struct NamedIdentifierNode : public IdentifierNode { NamedIdentifierNode() : IdentifierNode(NodeKind::NamedIdentifier) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; StringView Name; }; @@ -394,7 +395,7 @@ struct IntrinsicFunctionIdentifierNode : public IdentifierNode { : IdentifierNode(NodeKind::IntrinsicFunctionIdentifier), Operator(Operator) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; IntrinsicFunctionKind Operator; }; @@ -403,7 +404,7 @@ struct LiteralOperatorIdentifierNode : public IdentifierNode { LiteralOperatorIdentifierNode() : IdentifierNode(NodeKind::LiteralOperatorIdentifier) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; StringView Name; }; @@ -412,7 +413,7 @@ struct LocalStaticGuardIdentifierNode : public IdentifierNode { LocalStaticGuardIdentifierNode() : IdentifierNode(NodeKind::LocalStaticGuardIdentifier) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; bool IsThread = false; uint32_t ScopeIndex = 0; @@ -422,7 +423,7 @@ struct ConversionOperatorIdentifierNode : public IdentifierNode { ConversionOperatorIdentifierNode() : IdentifierNode(NodeKind::ConversionOperatorIdentifier) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; // The type that this operator converts too. TypeNode *TargetType = nullptr; @@ -434,7 +435,7 @@ struct StructorIdentifierNode : public IdentifierNode { : IdentifierNode(NodeKind::StructorIdentifier), IsDestructor(IsDestructor) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; // The name of the class that this is a structor of. IdentifierNode *Class = nullptr; @@ -444,8 +445,8 @@ struct StructorIdentifierNode : public IdentifierNode { struct ThunkSignatureNode : public FunctionSignatureNode { ThunkSignatureNode() : FunctionSignatureNode(NodeKind::ThunkSignature) {} - void outputPre(OutputStream &OS, OutputFlags Flags) const override; - void outputPost(OutputStream &OS, OutputFlags Flags) const override; + void outputPre(OutputBuffer &OB, OutputFlags Flags) const override; + void outputPost(OutputBuffer &OB, OutputFlags Flags) const override; struct ThisAdjustor { uint32_t StaticOffset = 0; @@ -459,8 +460,8 @@ struct ThunkSignatureNode : public FunctionSignatureNode { struct PointerTypeNode : public TypeNode { PointerTypeNode() : TypeNode(NodeKind::PointerType) {} - void outputPre(OutputStream &OS, OutputFlags Flags) const override; - void outputPost(OutputStream &OS, OutputFlags Flags) const override; + void outputPre(OutputBuffer &OB, OutputFlags Flags) const override; + void outputPost(OutputBuffer &OB, OutputFlags Flags) const override; // Is this a pointer, reference, or rvalue-reference? PointerAffinity Affinity = PointerAffinity::None; @@ -476,8 +477,8 @@ struct PointerTypeNode : public TypeNode { struct TagTypeNode : public TypeNode { explicit TagTypeNode(TagKind Tag) : TypeNode(NodeKind::TagType), Tag(Tag) {} - void outputPre(OutputStream &OS, OutputFlags Flags) const override; - void outputPost(OutputStream &OS, OutputFlags Flags) const override; + void outputPre(OutputBuffer &OB, OutputFlags Flags) const override; + void outputPost(OutputBuffer &OB, OutputFlags Flags) const override; QualifiedNameNode *QualifiedName = nullptr; TagKind Tag; @@ -486,11 +487,11 @@ struct TagTypeNode : public TypeNode { struct ArrayTypeNode : public TypeNode { ArrayTypeNode() : TypeNode(NodeKind::ArrayType) {} - void outputPre(OutputStream &OS, OutputFlags Flags) const override; - void outputPost(OutputStream &OS, OutputFlags Flags) const override; + void outputPre(OutputBuffer &OB, OutputFlags Flags) const override; + void outputPost(OutputBuffer &OB, OutputFlags Flags) const override; - void outputDimensionsImpl(OutputStream &OS, OutputFlags Flags) const; - void outputOneDimension(OutputStream &OS, OutputFlags Flags, Node *N) const; + void outputDimensionsImpl(OutputBuffer &OB, OutputFlags Flags) const; + void outputOneDimension(OutputBuffer &OB, OutputFlags Flags, Node *N) const; // A list of array dimensions. e.g. [3,4,5] in `int Foo[3][4][5]` NodeArrayNode *Dimensions = nullptr; @@ -501,14 +502,14 @@ struct ArrayTypeNode : public TypeNode { struct IntrinsicNode : public TypeNode { IntrinsicNode() : TypeNode(NodeKind::IntrinsicType) {} - void output(OutputStream &OS, OutputFlags Flags) const override {} + void output(OutputBuffer &OB, OutputFlags Flags) const override {} }; struct CustomTypeNode : public TypeNode { CustomTypeNode() : TypeNode(NodeKind::Custom) {} - void outputPre(OutputStream &OS, OutputFlags Flags) const override; - void outputPost(OutputStream &OS, OutputFlags Flags) const override; + void outputPre(OutputBuffer &OB, OutputFlags Flags) const override; + void outputPost(OutputBuffer &OB, OutputFlags Flags) const override; IdentifierNode *Identifier = nullptr; }; @@ -516,9 +517,9 @@ struct CustomTypeNode : public TypeNode { struct NodeArrayNode : public Node { NodeArrayNode() : Node(NodeKind::NodeArray) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; - void output(OutputStream &OS, OutputFlags Flags, StringView Separator) const; + void output(OutputBuffer &OB, OutputFlags Flags, StringView Separator) const; Node **Nodes = nullptr; size_t Count = 0; @@ -527,7 +528,7 @@ struct NodeArrayNode : public Node { struct QualifiedNameNode : public Node { QualifiedNameNode() : Node(NodeKind::QualifiedName) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; NodeArrayNode *Components = nullptr; @@ -541,7 +542,7 @@ struct TemplateParameterReferenceNode : public Node { TemplateParameterReferenceNode() : Node(NodeKind::TemplateParameterReference) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; SymbolNode *Symbol = nullptr; @@ -556,7 +557,7 @@ struct IntegerLiteralNode : public Node { IntegerLiteralNode(uint64_t Value, bool IsNegative) : Node(NodeKind::IntegerLiteral), Value(Value), IsNegative(IsNegative) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; uint64_t Value = 0; bool IsNegative = false; @@ -566,7 +567,7 @@ struct RttiBaseClassDescriptorNode : public IdentifierNode { RttiBaseClassDescriptorNode() : IdentifierNode(NodeKind::RttiBaseClassDescriptor) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; uint32_t NVOffset = 0; int32_t VBPtrOffset = 0; @@ -576,7 +577,7 @@ struct RttiBaseClassDescriptorNode : public IdentifierNode { struct SymbolNode : public Node { explicit SymbolNode(NodeKind K) : Node(K) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; QualifiedNameNode *Name = nullptr; }; @@ -584,7 +585,7 @@ struct SpecialTableSymbolNode : public SymbolNode { explicit SpecialTableSymbolNode() : SymbolNode(NodeKind::SpecialTableSymbol) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; QualifiedNameNode *TargetName = nullptr; Qualifiers Quals = Qualifiers::Q_None; }; @@ -593,7 +594,7 @@ struct LocalStaticGuardVariableNode : public SymbolNode { LocalStaticGuardVariableNode() : SymbolNode(NodeKind::LocalStaticGuardVariable) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; bool IsVisible = false; }; @@ -601,7 +602,7 @@ struct LocalStaticGuardVariableNode : public SymbolNode { struct EncodedStringLiteralNode : public SymbolNode { EncodedStringLiteralNode() : SymbolNode(NodeKind::EncodedStringLiteral) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; StringView DecodedString; bool IsTruncated = false; @@ -611,7 +612,7 @@ struct EncodedStringLiteralNode : public SymbolNode { struct VariableSymbolNode : public SymbolNode { VariableSymbolNode() : SymbolNode(NodeKind::VariableSymbol) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; StorageClass SC = StorageClass::None; TypeNode *Type = nullptr; @@ -620,7 +621,7 @@ struct VariableSymbolNode : public SymbolNode { struct FunctionSymbolNode : public SymbolNode { FunctionSymbolNode() : SymbolNode(NodeKind::FunctionSymbol) {} - void output(OutputStream &OS, OutputFlags Flags) const override; + void output(OutputBuffer &OB, OutputFlags Flags) const override; FunctionSignatureNode *Signature = nullptr; }; diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h index 04ff65a35aed..4fea9351a4bf 100644 --- a/llvm/include/llvm/Demangle/Utility.h +++ b/llvm/include/llvm/Demangle/Utility.h @@ -24,7 +24,7 @@ DEMANGLE_NAMESPACE_BEGIN // Stream that AST nodes write their string representation into after the AST // has been parsed. -class OutputStream { +class OutputBuffer { char *Buffer = nullptr; size_t CurrentPosition = 0; size_t BufferCapacity = 0; @@ -63,9 +63,9 @@ class OutputStream { } public: - OutputStream(char *StartBuf, size_t Size) + OutputBuffer(char *StartBuf, size_t Size) : Buffer(StartBuf), CurrentPosition(0), BufferCapacity(Size) {} - OutputStream() = default; + OutputBuffer() = default; void reset(char *Buffer_, size_t BufferCapacity_) { CurrentPosition = 0; Buffer = Buffer_; @@ -77,7 +77,7 @@ public: unsigned CurrentPackIndex = std::numeric_limits<unsigned>::max(); unsigned CurrentPackMax = std::numeric_limits<unsigned>::max(); - OutputStream &operator+=(StringView R) { + OutputBuffer &operator+=(StringView R) { size_t Size = R.size(); if (Size == 0) return *this; @@ -87,17 +87,28 @@ public: return *this; } - OutputStream &operator+=(char C) { + OutputBuffer &operator+=(char C) { grow(1); Buffer[CurrentPosition++] = C; return *this; } - OutputStream &operator<<(StringView R) { return (*this += R); } + OutputBuffer &operator<<(StringView R) { return (*this += R); } - OutputStream &operator<<(char C) { return (*this += C); } + OutputBuffer prepend(StringView R) { + size_t Size = R.size(); + + grow(Size); + std::memmove(Buffer + Size, Buffer, CurrentPosition); + std::memcpy(Buffer, R.begin(), Size); + CurrentPosition += Size; - OutputStream &operator<<(long long N) { + return *this; + } + + OutputBuffer &operator<<(char C) { return (*this += C); } + + OutputBuffer &operator<<(long long N) { if (N < 0) writeUnsigned(static_cast<unsigned long long>(-N), true); else @@ -105,27 +116,37 @@ public: return *this; } - OutputStream &operator<<(unsigned long long N) { + OutputBuffer &operator<<(unsigned long long N) { writeUnsigned(N, false); return *this; } - OutputStream &operator<<(long N) { + OutputBuffer &operator<<(long N) { return this->operator<<(static_cast<long long>(N)); } - OutputStream &operator<<(unsigned long N) { + OutputBuffer &operator<<(unsigned long N) { return this->operator<<(static_cast<unsigned long long>(N)); } - OutputStream &operator<<(int N) { + OutputBuffer &operator<<(int N) { return this->operator<<(static_cast<long long>(N)); } - OutputStream &operator<<(unsigned int N) { + OutputBuffer &operator<<(unsigned int N) { return this->operator<<(static_cast<unsigned long long>(N)); } + void insert(size_t Pos, const char *S, size_t N) { + assert(Pos <= CurrentPosition); + if (N == 0) + return; + grow(N); + std::memmove(Buffer + Pos + N, Buffer + Pos, CurrentPosition - Pos); + std::memcpy(Buffer + Pos, S, N); + CurrentPosition += N; + } + size_t getCurrentPosition() const { return CurrentPosition; } void setCurrentPosition(size_t NewPos) { CurrentPosition = NewPos; } @@ -171,7 +192,7 @@ public: SwapAndRestore &operator=(const SwapAndRestore &) = delete; }; -inline bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S, +inline bool initializeOutputBuffer(char *Buf, size_t *N, OutputBuffer &OB, size_t InitSize) { size_t BufferSize; if (Buf == nullptr) { @@ -182,7 +203,7 @@ inline bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S, } else BufferSize = *N; - S.reset(Buf, BufferSize); + OB.reset(Buf, BufferSize); return true; } diff --git a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h index 2e386518f0bf..43c91fb5f988 100644 --- a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -21,7 +21,6 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/OrcV1Deprecation.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Module.h" #include "llvm/Object/Binary.h" diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h new file mode 100644 index 000000000000..50eb598139ea --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h @@ -0,0 +1,39 @@ +//===--- ELF_aarch64.h - JIT link functions for ELF/aarch64 --*- C++ -*----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +// +// jit-link functions for ELF/aarch64. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH64_H +#define LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH64_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +namespace llvm { +namespace jitlink { + +/// Create a LinkGraph from an ELF/aarch64 relocatable object +/// +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected<std::unique_ptr<LinkGraph>> +createLinkGraphFromELFObject_aarch64(MemoryBufferRef ObjectBuffer); + +/// jit-link the given object buffer, which must be a ELF aarch64 relocatable +/// object file. +void link_ELF_aarch64(std::unique_ptr<LinkGraph> G, + std::unique_ptr<JITLinkContext> Ctx); + +} // end namespace jitlink +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH64_H diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h index 1339ab51cbb9..5a8b186a2c3e 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h @@ -35,4 +35,4 @@ void link_ELF_riscv(std::unique_ptr<LinkGraph> G, } // end namespace jitlink } // end namespace llvm -#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_RISCV64_H +#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_RISCV_H diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h index d8ed953363e6..f5fa9e96c594 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h @@ -21,29 +21,17 @@ namespace jitlink { namespace ELF_x86_64_Edges { enum ELFX86RelocationKind : Edge::Kind { Branch32 = Edge::FirstRelocation, - Branch32ToStub, - Pointer32, + Pointer32Signed, Pointer64, - Pointer64Anon, PCRel32, - PCRel64, - PCRel32Minus1, - PCRel32Minus2, - PCRel32Minus4, - PCRel32Anon, - PCRel32Minus1Anon, - PCRel32Minus2Anon, - PCRel32Minus4Anon, PCRel32GOTLoad, - PCRel32GOT, + PCRel32GOTLoadRelaxable, + PCRel32REXGOTLoadRelaxable, + PCRel32TLV, PCRel64GOT, GOTOFF64, GOT64, - PCRel32TLV, - Delta32, Delta64, - NegDelta32, - NegDelta64, }; } // end namespace ELF_x86_64_Edges diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 6162a675ec12..83d85953fce6 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -13,19 +13,19 @@ #ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H #define LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H -#include "JITLinkMemoryManager.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" +#include "llvm/ExecutionEngine/JITLink/MemoryFlags.h" #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Memory.h" #include "llvm/Support/MemoryBuffer.h" #include <map> @@ -225,7 +225,7 @@ public: /// Get the content for this block. Block must not be a zero-fill block. ArrayRef<char> getContent() const { - assert(Data && "Section does not contain content"); + assert(Data && "Block does not contain content"); return ArrayRef<char>(Data, Size); } @@ -233,6 +233,7 @@ public: /// Caller is responsible for ensuring the underlying bytes are not /// deallocated while pointed to by this block. void setContent(ArrayRef<char> Content) { + assert(Content.data() && "Setting null content"); Data = Content.data(); Size = Content.size(); ContentMutable = false; @@ -251,6 +252,7 @@ public: /// to call this on a block with immutable content -- consider using /// getMutableContent instead. MutableArrayRef<char> getAlreadyMutableContent() { + assert(Data && "Block does not contain content"); assert(ContentMutable && "Content is not mutable"); return MutableArrayRef<char>(const_cast<char *>(Data), Size); } @@ -260,6 +262,7 @@ public: /// The caller is responsible for ensuring that the memory pointed to by /// MutableContent is not deallocated while pointed to by this block. void setMutableContent(MutableArrayRef<char> MutableContent) { + assert(MutableContent.data() && "Setting null content"); Data = MutableContent.data(); Size = MutableContent.size(); ContentMutable = true; @@ -295,6 +298,7 @@ public: /// Add an edge to this block. void addEdge(Edge::Kind K, Edge::OffsetT Offset, Symbol &Target, Edge::AddendT Addend) { + assert(!isZeroFill() && "Adding edge to zero-fill block?"); Edges.push_back(Edge(K, Offset, Target, Addend)); } @@ -339,6 +343,12 @@ private: std::vector<Edge> Edges; }; +// Align a JITTargetAddress to conform with block alignment requirements. +inline JITTargetAddress alignToBlock(JITTargetAddress Addr, Block &B) { + uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment(); + return Addr + Delta; +} + /// Describes symbol linkage. This can be used to make resolve definition /// clashes. enum class Linkage : uint8_t { @@ -640,8 +650,7 @@ class Section { friend class LinkGraph; private: - Section(StringRef Name, sys::Memory::ProtectionFlags Prot, - SectionOrdinal SecOrdinal) + Section(StringRef Name, MemProt Prot, SectionOrdinal SecOrdinal) : Name(Name), Prot(Prot), SecOrdinal(SecOrdinal) {} using SymbolSet = DenseSet<Symbol *>; @@ -666,12 +675,16 @@ public: StringRef getName() const { return Name; } /// Returns the protection flags for this section. - sys::Memory::ProtectionFlags getProtectionFlags() const { return Prot; } + MemProt getMemProt() const { return Prot; } /// Set the protection flags for this section. - void setProtectionFlags(sys::Memory::ProtectionFlags Prot) { - this->Prot = Prot; - } + void setMemProt(MemProt Prot) { this->Prot = Prot; } + + /// Get the deallocation policy for this section. + MemDeallocPolicy getMemDeallocPolicy() const { return MDP; } + + /// Set the deallocation policy for this section. + void setMemDeallocPolicy(MemDeallocPolicy MDP) { this->MDP = MDP; } /// Returns the ordinal for this section. SectionOrdinal getOrdinal() const { return SecOrdinal; } @@ -686,6 +699,7 @@ public: return make_range(Blocks.begin(), Blocks.end()); } + /// Returns the number of blocks in this section. BlockSet::size_type blocks_size() const { return Blocks.size(); } /// Returns an iterator over the symbols defined in this section. @@ -734,7 +748,8 @@ private: } StringRef Name; - sys::Memory::ProtectionFlags Prot; + MemProt Prot; + MemDeallocPolicy MDP = MemDeallocPolicy::Standard; SectionOrdinal SecOrdinal = 0; BlockSet Blocks; SymbolSet Symbols; @@ -916,6 +931,11 @@ public: : Name(std::move(Name)), TT(TT), PointerSize(PointerSize), Endianness(Endianness), GetEdgeKindName(std::move(GetEdgeKindName)) {} + LinkGraph(const LinkGraph &) = delete; + LinkGraph &operator=(const LinkGraph &) = delete; + LinkGraph(LinkGraph &&) = delete; + LinkGraph &operator=(LinkGraph &&) = delete; + /// Returns the name of this graph (usually the name of the original /// underlying MemoryBuffer). const std::string &getName() const { return Name; } @@ -962,7 +982,7 @@ public: } /// Create a section with the given name, protection flags, and alignment. - Section &createSection(StringRef Name, sys::Memory::ProtectionFlags Prot) { + Section &createSection(StringRef Name, MemProt Prot) { assert(llvm::find_if(Sections, [&](std::unique_ptr<Section> &Sec) { return Sec->getName() == Name; @@ -1100,10 +1120,10 @@ public: Symbol &addDefinedSymbol(Block &Content, JITTargetAddress Offset, StringRef Name, JITTargetAddress Size, Linkage L, Scope S, bool IsCallable, bool IsLive) { - assert(llvm::count_if(defined_symbols(), - [&](const Symbol *Sym) { - return Sym->getName() == Name; - }) == 0 && + assert((S == Scope::Local || llvm::count_if(defined_symbols(), + [&](const Symbol *Sym) { + return Sym->getName() == Name; + }) == 0) && "Duplicate defined symbol"); auto &Sym = Symbol::constructNamedDef(Allocator.Allocate<Symbol>(), Content, Offset, @@ -1237,6 +1257,7 @@ public: void transferDefinedSymbol(Symbol &Sym, Block &DestBlock, JITTargetAddress NewOffset, Optional<JITTargetAddress> ExplicitNewSize) { + auto &OldSection = Sym.getBlock().getSection(); Sym.setBlock(DestBlock); Sym.setOffset(NewOffset); if (ExplicitNewSize) @@ -1246,6 +1267,10 @@ public: if (Sym.getSize() > RemainingBlockSize) Sym.setSize(RemainingBlockSize); } + if (&DestBlock.getSection() != &OldSection) { + OldSection.removeSymbol(Sym); + DestBlock.getSection().addSymbol(Sym); + } } /// Transfers the given Block and all Symbols pointing to it to the given @@ -1280,6 +1305,8 @@ public: bool PreserveSrcSection = false) { if (&DstSection == &SrcSection) return; + for (auto *B : SrcSection.blocks()) + B->setSection(DstSection); SrcSection.transferContentTo(DstSection); if (!PreserveSrcSection) removeSection(SrcSection); @@ -1345,6 +1372,13 @@ public: Sections.erase(I); } + /// Accessor for the AllocActions object for this graph. This can be used to + /// register allocation action calls prior to finalization. + /// + /// Accessing this object after finalization will result in undefined + /// behavior. + JITLinkMemoryManager::AllocActions &allocActions() { return AAs; } + /// Dump the graph. void dump(raw_ostream &OS); @@ -1361,6 +1395,7 @@ private: SectionList Sections; ExternalSymbolSet ExternalSymbols; ExternalSymbolSet AbsoluteSymbols; + JITLinkMemoryManager::AllocActions AAs; }; inline MutableArrayRef<char> Block::getMutableContent(LinkGraph &G) { @@ -1650,8 +1685,7 @@ public: /// finalized (i.e. emitted to memory and memory permissions set). If all of /// this objects dependencies have also been finalized then the code is ready /// to run. - virtual void - notifyFinalized(std::unique_ptr<JITLinkMemoryManager::Allocation> A) = 0; + virtual void notifyFinalized(JITLinkMemoryManager::FinalizedAlloc Alloc) = 0; /// Called by JITLink prior to linking to determine whether default passes for /// the target should be added. The default implementation returns true. @@ -1683,6 +1717,36 @@ Error markAllSymbolsLive(LinkGraph &G); Error makeTargetOutOfRangeError(const LinkGraph &G, const Block &B, const Edge &E); +/// Base case for edge-visitors where the visitor-list is empty. +inline void visitEdge(LinkGraph &G, Block *B, Edge &E) {} + +/// Applies the first visitor in the list to the given edge. If the visitor's +/// visitEdge method returns true then we return immediately, otherwise we +/// apply the next visitor. +template <typename VisitorT, typename... VisitorTs> +void visitEdge(LinkGraph &G, Block *B, Edge &E, VisitorT &&V, + VisitorTs &&...Vs) { + if (!V.visitEdge(G, B, E)) + visitEdge(G, B, E, std::forward<VisitorTs>(Vs)...); +} + +/// For each edge in the given graph, apply a list of visitors to the edge, +/// stopping when the first visitor's visitEdge method returns true. +/// +/// Only visits edges that were in the graph at call time: if any visitor +/// adds new edges those will not be visited. Visitors are not allowed to +/// remove edges (though they can change their kind, target, and addend). +template <typename... VisitorTs> +void visitExistingEdges(LinkGraph &G, VisitorTs &&...Vs) { + // We may add new blocks during this process, but we don't want to iterate + // over them, so build a worklist. + std::vector<Block *> Worklist(G.blocks().begin(), G.blocks().end()); + + for (auto *B : Worklist) + for (auto &E : B->edges()) + visitEdge(G, B, E, std::forward<VisitorTs>(Vs)...); +} + /// Create a LinkGraph from the given object buffer. /// /// Note: The graph does not take ownership of the underlying buffer, nor copy diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h index cee7d6b09c48..62c271dfc0b2 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h @@ -13,106 +13,416 @@ #ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H #define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H -#include "llvm/ADT/DenseMap.h" #include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h" +#include "llvm/ExecutionEngine/JITLink/MemoryFlags.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" #include "llvm/Support/MSVCErrorWorkarounds.h" #include "llvm/Support/Memory.h" +#include "llvm/Support/RecyclingAllocator.h" #include <cstdint> #include <future> +#include <mutex> namespace llvm { namespace jitlink { +class Block; +class LinkGraph; +class Section; + /// Manages allocations of JIT memory. /// /// Instances of this class may be accessed concurrently from multiple threads /// and their implemetations should include any necessary synchronization. class JITLinkMemoryManager { public: - using ProtectionFlags = sys::Memory::ProtectionFlags; + /// Represents a call to a graph-memory-management support function in the + /// executor. + /// + /// Support functions are called as: + /// + /// auto *Result = + /// ((char*(*)(const void*, size_t))FnAddr)( + /// (const void*)CtxAddr, (size_t)CtxSize) + /// + /// A null result is interpreted as success. + /// + /// A non-null result is interpreted as a heap-allocated string containing + /// an error message to report to the allocator (the allocator's + /// executor-side implementation code is responsible for freeing the error + /// string). + struct AllocActionCall { + JITTargetAddress FnAddr = 0; + JITTargetAddress CtxAddr = 0; + JITTargetAddress CtxSize = 0; + }; + + /// A pair of AllocActionCalls, one to be run at finalization time, one to be + /// run at deallocation time. + /// + /// AllocActionCallPairs should be constructed for paired operations (e.g. + /// __register_ehframe and __deregister_ehframe for eh-frame registration). + /// See comments for AllocActions for execution ordering. + /// + /// For unpaired operations one or the other member can be left unused, as + /// AllocationActionCalls with an FnAddr of zero will be skipped. + struct AllocActionCallPair { + AllocActionCall Finalize; + AllocActionCall Dealloc; + }; + + /// A vector of allocation actions to be run for this allocation. + /// + /// Finalize allocations will be run in order at finalize time. Dealloc + /// actions will be run in reverse order at deallocation time. + using AllocActions = std::vector<AllocActionCallPair>; + + /// Represents a finalized allocation. + /// + /// Finalized allocations must be passed to the + /// JITLinkMemoryManager:deallocate method prior to being destroyed. + /// + /// The interpretation of the Address associated with the finalized allocation + /// is up to the memory manager implementation. Common options are using the + /// base address of the allocation, or the address of a memory management + /// object that tracks the allocation. + class FinalizedAlloc { + friend class JITLinkMemoryManager; - class SegmentRequest { public: - SegmentRequest() = default; - SegmentRequest(uint64_t Alignment, size_t ContentSize, - uint64_t ZeroFillSize) - : Alignment(Alignment), ContentSize(ContentSize), - ZeroFillSize(ZeroFillSize) { - assert(isPowerOf2_32(Alignment) && "Alignment must be power of 2"); + static constexpr JITTargetAddress InvalidAddr = ~JITTargetAddress(0); + + FinalizedAlloc() = default; + explicit FinalizedAlloc(JITTargetAddress A) : A(A) { + assert(A != 0 && "Explicitly creating an invalid allocation?"); + } + FinalizedAlloc(const FinalizedAlloc &) = delete; + FinalizedAlloc(FinalizedAlloc &&Other) : A(Other.A) { + Other.A = InvalidAddr; + } + FinalizedAlloc &operator=(const FinalizedAlloc &) = delete; + FinalizedAlloc &operator=(FinalizedAlloc &&Other) { + assert(A == InvalidAddr && + "Cannot overwrite active finalized allocation"); + std::swap(A, Other.A); + return *this; + } + ~FinalizedAlloc() { + assert(A == InvalidAddr && "Finalized allocation was not deallocated"); + } + + /// FinalizedAllocs convert to false for default-constructed, and + /// true otherwise. Default-constructed allocs need not be deallocated. + explicit operator bool() const { return A != InvalidAddr; } + + /// Returns the address associated with this finalized allocation. + /// The allocation is unmodified. + JITTargetAddress getAddress() const { return A; } + + /// Returns the address associated with this finalized allocation and + /// resets this object to the default state. + /// This should only be used by allocators when deallocating memory. + JITTargetAddress release() { + JITTargetAddress Tmp = A; + A = InvalidAddr; + return Tmp; } - uint64_t getAlignment() const { return Alignment; } - size_t getContentSize() const { return ContentSize; } - uint64_t getZeroFillSize() const { return ZeroFillSize; } + private: - uint64_t Alignment = 0; - size_t ContentSize = 0; - uint64_t ZeroFillSize = 0; + JITTargetAddress A = InvalidAddr; }; - using SegmentsRequestMap = DenseMap<unsigned, SegmentRequest>; - - /// Represents an allocation created by the memory manager. + /// Represents an allocation which has not been finalized yet. /// - /// An allocation object is responsible for allocating and owning jit-linker - /// working and target memory, and for transfering from working to target - /// memory. + /// InFlightAllocs manage both executor memory allocations and working + /// memory allocations. /// - class Allocation { + /// On finalization, the InFlightAlloc should transfer the content of + /// working memory into executor memory, apply memory protections, and + /// run any finalization functions. + /// + /// Working memory should be kept alive at least until one of the following + /// happens: (1) the InFlightAlloc instance is destroyed, (2) the + /// InFlightAlloc is abandoned, (3) finalized target memory is destroyed. + /// + /// If abandon is called then working memory and executor memory should both + /// be freed. + class InFlightAlloc { public: - using FinalizeContinuation = std::function<void(Error)>; - - virtual ~Allocation(); + using OnFinalizedFunction = unique_function<void(Expected<FinalizedAlloc>)>; + using OnAbandonedFunction = unique_function<void(Error)>; - /// Should return the address of linker working memory for the segment with - /// the given protection flags. - virtual MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) = 0; + virtual ~InFlightAlloc(); - /// Should return the final address in the target process where the segment - /// will reside. - virtual JITTargetAddress getTargetMemory(ProtectionFlags Seg) = 0; + /// Called prior to finalization if the allocation should be abandoned. + virtual void abandon(OnAbandonedFunction OnAbandoned) = 0; - /// Should transfer from working memory to target memory, and release - /// working memory. - virtual void finalizeAsync(FinalizeContinuation OnFinalize) = 0; + /// Called to transfer working memory to the target and apply finalization. + virtual void finalize(OnFinalizedFunction OnFinalized) = 0; - /// Calls finalizeAsync and waits for completion. - Error finalize() { - std::promise<MSVCPError> FinalizeResultP; + /// Synchronous convenience version of finalize. + Expected<FinalizedAlloc> finalize() { + std::promise<MSVCPExpected<FinalizedAlloc>> FinalizeResultP; auto FinalizeResultF = FinalizeResultP.get_future(); - finalizeAsync( - [&](Error Err) { FinalizeResultP.set_value(std::move(Err)); }); + finalize([&](Expected<FinalizedAlloc> Result) { + FinalizeResultP.set_value(std::move(Result)); + }); return FinalizeResultF.get(); } - - /// Should deallocate target memory. - virtual Error deallocate() = 0; }; + /// Typedef for the argument to be passed to OnAllocatedFunction. + using AllocResult = Expected<std::unique_ptr<InFlightAlloc>>; + + /// Called when allocation has been completed. + using OnAllocatedFunction = unique_function<void(AllocResult)>; + + /// Called when deallocation has completed. + using OnDeallocatedFunction = unique_function<void(Error)>; + virtual ~JITLinkMemoryManager(); - /// Create an Allocation object. + /// Start the allocation process. /// - /// The JD argument represents the target JITLinkDylib, and can be used by - /// JITLinkMemoryManager implementers to manage per-dylib allocation pools - /// (e.g. one pre-reserved address space slab per dylib to ensure that all - /// allocations for the dylib are within a certain range). The JD argument - /// may be null (representing an allocation not associated with any - /// JITDylib. + /// If the initial allocation is successful then the OnAllocated function will + /// be called with a std::unique_ptr<InFlightAlloc> value. If the assocation + /// is unsuccessful then the OnAllocated function will be called with an + /// Error. + virtual void allocate(const JITLinkDylib *JD, LinkGraph &G, + OnAllocatedFunction OnAllocated) = 0; + + /// Convenience function for blocking allocation. + AllocResult allocate(const JITLinkDylib *JD, LinkGraph &G) { + std::promise<MSVCPExpected<std::unique_ptr<InFlightAlloc>>> AllocResultP; + auto AllocResultF = AllocResultP.get_future(); + allocate(JD, G, [&](AllocResult Alloc) { + AllocResultP.set_value(std::move(Alloc)); + }); + return AllocResultF.get(); + } + + /// Deallocate a list of allocation objects. /// - /// The request argument describes the segment sizes and permisssions being - /// requested. - virtual Expected<std::unique_ptr<Allocation>> - allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) = 0; + /// Dealloc actions will be run in reverse order (from the end of the vector + /// to the start). + virtual void deallocate(std::vector<FinalizedAlloc> Allocs, + OnDeallocatedFunction OnDeallocated) = 0; + + /// Convenience function for deallocation of a single alloc. + void deallocate(FinalizedAlloc Alloc, OnDeallocatedFunction OnDeallocated) { + std::vector<FinalizedAlloc> Allocs; + Allocs.push_back(std::move(Alloc)); + deallocate(std::move(Allocs), std::move(OnDeallocated)); + } + + /// Convenience function for blocking deallocation. + Error deallocate(std::vector<FinalizedAlloc> Allocs) { + std::promise<MSVCPError> DeallocResultP; + auto DeallocResultF = DeallocResultP.get_future(); + deallocate(std::move(Allocs), + [&](Error Err) { DeallocResultP.set_value(std::move(Err)); }); + return DeallocResultF.get(); + } + + /// Convenience function for blocking deallocation of a single alloc. + Error deallocate(FinalizedAlloc Alloc) { + std::vector<FinalizedAlloc> Allocs; + Allocs.push_back(std::move(Alloc)); + return deallocate(std::move(Allocs)); + } +}; + +/// BasicLayout simplifies the implementation of JITLinkMemoryManagers. +/// +/// BasicLayout groups Sections into Segments based on their memory protection +/// and deallocation policies. JITLinkMemoryManagers can construct a BasicLayout +/// from a Graph, and then assign working memory and addresses to each of the +/// Segments. These addreses will be mapped back onto the Graph blocks in +/// the apply method. +class BasicLayout { +public: + /// The Alignment, ContentSize and ZeroFillSize of each segment will be + /// pre-filled from the Graph. Clients must set the Addr and WorkingMem fields + /// prior to calling apply. + // + // FIXME: The C++98 initializer is an attempt to work around compile failures + // due to http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1397. + // We should be able to switch this back to member initialization once that + // issue is fixed. + class Segment { + friend class BasicLayout; + + public: + Segment() + : ContentSize(0), ZeroFillSize(0), Addr(0), WorkingMem(nullptr), + NextWorkingMemOffset(0) {} + Align Alignment; + size_t ContentSize; + uint64_t ZeroFillSize; + JITTargetAddress Addr; + char *WorkingMem = nullptr; + + private: + size_t NextWorkingMemOffset; + std::vector<Block *> ContentBlocks, ZeroFillBlocks; + }; + + /// A convenience class that further groups segments based on memory + /// deallocation policy. This allows clients to make two slab allocations: + /// one for all standard segments, and one for all finalize segments. + struct ContiguousPageBasedLayoutSizes { + uint64_t StandardSegs = 0; + uint64_t FinalizeSegs = 0; + + uint64_t total() const { return StandardSegs + FinalizeSegs; } + }; + +private: + using SegmentMap = AllocGroupSmallMap<Segment>; + +public: + BasicLayout(LinkGraph &G); + + /// Return a reference to the graph this allocation was created from. + LinkGraph &getGraph() { return G; } + + /// Returns the total number of required to allocate all segments (with each + /// segment padded out to page size) for all standard segments, and all + /// finalize segments. + /// + /// This is a convenience function for the common case where the segments will + /// be allocated contiguously. + /// + /// This function will return an error if any segment has an alignment that + /// is higher than a page. + Expected<ContiguousPageBasedLayoutSizes> + getContiguousPageBasedLayoutSizes(uint64_t PageSize); + + /// Returns an iterator over the segments of the layout. + iterator_range<SegmentMap::iterator> segments() { + return {Segments.begin(), Segments.end()}; + } + + /// Apply the layout to the graph. + Error apply(); + + /// Returns a reference to the AllocActions in the graph. + /// This convenience function saves callers from having to #include + /// LinkGraph.h if all they need are allocation actions. + JITLinkMemoryManager::AllocActions &graphAllocActions(); + +private: + LinkGraph &G; + SegmentMap Segments; +}; + +/// A utility class for making simple allocations using JITLinkMemoryManager. +/// +/// SimpleSegementAlloc takes a mapping of AllocGroups to Segments and uses +/// this to create a LinkGraph with one Section (containing one Block) per +/// Segment. Clients can obtain a pointer to the working memory and executor +/// address of that block using the Segment's AllocGroup. Once memory has been +/// populated, clients can call finalize to finalize the memory. +class SimpleSegmentAlloc { +public: + /// Describes a segment to be allocated. + struct Segment { + Segment() = default; + Segment(size_t ContentSize, Align ContentAlign) + : ContentSize(ContentSize), ContentAlign(ContentAlign) {} + + size_t ContentSize = 0; + Align ContentAlign; + }; + + /// Describes the segment working memory and executor address. + struct SegmentInfo { + JITTargetAddress Addr = 0; + MutableArrayRef<char> WorkingMem; + }; + + using SegmentMap = AllocGroupSmallMap<Segment>; + + using OnCreatedFunction = unique_function<void(Expected<SimpleSegmentAlloc>)>; + + using OnFinalizedFunction = + JITLinkMemoryManager::InFlightAlloc::OnFinalizedFunction; + + static void Create(JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD, + SegmentMap Segments, OnCreatedFunction OnCreated); + + static Expected<SimpleSegmentAlloc> Create(JITLinkMemoryManager &MemMgr, + const JITLinkDylib *JD, + SegmentMap Segments); + + SimpleSegmentAlloc(SimpleSegmentAlloc &&); + SimpleSegmentAlloc &operator=(SimpleSegmentAlloc &&); + ~SimpleSegmentAlloc(); + + /// Returns the SegmentInfo for the given group. + SegmentInfo getSegInfo(AllocGroup AG); + + /// Finalize all groups (async version). + void finalize(OnFinalizedFunction OnFinalized) { + Alloc->finalize(std::move(OnFinalized)); + } + + /// Finalize all groups. + Expected<JITLinkMemoryManager::FinalizedAlloc> finalize() { + return Alloc->finalize(); + } + +private: + SimpleSegmentAlloc( + std::unique_ptr<LinkGraph> G, AllocGroupSmallMap<Block *> ContentBlocks, + std::unique_ptr<JITLinkMemoryManager::InFlightAlloc> Alloc); + + std::unique_ptr<LinkGraph> G; + AllocGroupSmallMap<Block *> ContentBlocks; + std::unique_ptr<JITLinkMemoryManager::InFlightAlloc> Alloc; }; /// A JITLinkMemoryManager that allocates in-process memory. class InProcessMemoryManager : public JITLinkMemoryManager { public: - Expected<std::unique_ptr<Allocation>> - allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) override; + class IPInFlightAlloc; + + /// Attempts to auto-detect the host page size. + static Expected<std::unique_ptr<InProcessMemoryManager>> Create(); + + /// Create an instance using the given page size. + InProcessMemoryManager(uint64_t PageSize) : PageSize(PageSize) {} + + void allocate(const JITLinkDylib *JD, LinkGraph &G, + OnAllocatedFunction OnAllocated) override; + + // Use overloads from base class. + using JITLinkMemoryManager::allocate; + + void deallocate(std::vector<FinalizedAlloc> Alloc, + OnDeallocatedFunction OnDeallocated) override; + + // Use overloads from base class. + using JITLinkMemoryManager::deallocate; + +private: + // FIXME: Use an in-place array instead of a vector for DeallocActions. + // There shouldn't need to be a heap alloc for this. + struct FinalizedAllocInfo { + sys::MemoryBlock StandardSegments; + std::vector<AllocActionCall> DeallocActions; + }; + + FinalizedAlloc + createFinalizedAlloc(sys::MemoryBlock StandardSegments, + std::vector<AllocActionCall> DeallocActions); + + uint64_t PageSize; + std::mutex FinalizedAllocsMutex; + RecyclingAllocator<BumpPtrAllocator, FinalizedAllocInfo> FinalizedAllocInfos; }; } // end namespace jitlink diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h index ecbc93e1467d..aee14c0d1fe5 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h @@ -29,6 +29,8 @@ enum MachOARM64RelocationKind : Edge::Kind { PageOffset12, GOTPage21, GOTPageOffset12, + TLVPage21, + TLVPageOffset12, PointerToGOT, PairedAddend, LDRLiteral19, diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h new file mode 100644 index 000000000000..8fdce93ebc56 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h @@ -0,0 +1,225 @@ +//===-------- MemoryFlags.h - Memory allocation flags -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines types and operations related to memory protection and allocation +// lifetimes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_MEMORYFLAGS_H +#define LLVM_EXECUTIONENGINE_JITLINK_MEMORYFLAGS_H + +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace jitlink { + +/// Describes Read/Write/Exec permissions for memory. +enum class MemProt { + None = 0, + Read = 1U << 0, + Write = 1U << 1, + Exec = 1U << 2, + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Exec) +}; + +/// Print a MemProt as an RWX triple. +raw_ostream &operator<<(raw_ostream &OS, MemProt MP); + +/// Convert a MemProt value to a corresponding sys::Memory::ProtectionFlags +/// value. +inline sys::Memory::ProtectionFlags toSysMemoryProtectionFlags(MemProt MP) { + std::underlying_type_t<sys::Memory::ProtectionFlags> PF = 0; + if ((MP & MemProt::Read) != MemProt::None) + PF |= sys::Memory::MF_READ; + if ((MP & MemProt::Write) != MemProt::None) + PF |= sys::Memory::MF_WRITE; + if ((MP & MemProt::Exec) != MemProt::None) + PF |= sys::Memory::MF_EXEC; + return static_cast<sys::Memory::ProtectionFlags>(PF); +} + +/// Convert a sys::Memory::ProtectionFlags value to a corresponding MemProt +/// value. +inline MemProt fromSysMemoryProtectionFlags(sys::Memory::ProtectionFlags PF) { + MemProt MP = MemProt::None; + if (PF & sys::Memory::MF_READ) + MP |= MemProt::Read; + if (PF & sys::Memory::MF_WRITE) + MP |= MemProt::Write; + if (PF & sys::Memory::MF_EXEC) + MP |= MemProt::None; + return MP; +} + +/// Describes a memory deallocation policy for memory to be allocated by a +/// JITLinkMemoryManager. +/// +/// All memory allocated by a call to JITLinkMemoryManager::allocate should be +/// deallocated if a call is made to +/// JITLinkMemoryManager::InFlightAllocation::abandon. The policies below apply +/// to finalized allocations. +enum class MemDeallocPolicy { + /// Standard memory should be deallocated when the deallocate method is called + /// for the finalized allocation. + Standard, + + /// Finalize memory should be overwritten and then deallocated after all + /// finalization functions have been run. + Finalize +}; + +/// Print a MemDeallocPolicy. +raw_ostream &operator<<(raw_ostream &OS, MemDeallocPolicy MDP); + +/// A pair of memory protections and allocation policies. +/// +/// Optimized for use as a small map key. +class AllocGroup { + friend struct llvm::DenseMapInfo<AllocGroup>; + + using underlying_type = uint8_t; + static constexpr unsigned BitsForProt = 3; + static constexpr unsigned BitsForDeallocPolicy = 1; + static constexpr unsigned MaxIdentifiers = + 1U << (BitsForProt + BitsForDeallocPolicy); + +public: + static constexpr unsigned NumGroups = MaxIdentifiers; + + /// Create a default AllocGroup. No memory protections, standard + /// deallocation policy. + AllocGroup() = default; + + /// Create an AllocGroup from a MemProt only -- uses + /// MemoryDeallocationPolicy::Standard. + AllocGroup(MemProt MP) : Id(static_cast<underlying_type>(MP)) {} + + /// Create an AllocGroup from a MemProt and a MemoryDeallocationPolicy. + AllocGroup(MemProt MP, MemDeallocPolicy MDP) + : Id(static_cast<underlying_type>(MP) | + (static_cast<underlying_type>(MDP) << BitsForProt)) {} + + /// Returns the MemProt for this group. + MemProt getMemProt() const { + return static_cast<MemProt>(Id & ((1U << BitsForProt) - 1)); + } + + /// Returns the MemoryDeallocationPolicy for this group. + MemDeallocPolicy getMemDeallocPolicy() const { + return static_cast<MemDeallocPolicy>(Id >> BitsForProt); + } + + friend bool operator==(const AllocGroup &LHS, const AllocGroup &RHS) { + return LHS.Id == RHS.Id; + } + + friend bool operator!=(const AllocGroup &LHS, const AllocGroup &RHS) { + return !(LHS == RHS); + } + + friend bool operator<(const AllocGroup &LHS, const AllocGroup &RHS) { + return LHS.Id < RHS.Id; + } + +private: + AllocGroup(underlying_type RawId) : Id(RawId) {} + underlying_type Id = 0; +}; + +/// A specialized small-map for AllocGroups. +/// +/// Iteration order is guaranteed to match key ordering. +template <typename T> class AllocGroupSmallMap { +private: + using ElemT = std::pair<AllocGroup, T>; + using VectorTy = SmallVector<ElemT, 4>; + + static bool compareKey(const ElemT &E, const AllocGroup &G) { + return E.first < G; + } + +public: + using iterator = typename VectorTy::iterator; + + AllocGroupSmallMap() = default; + AllocGroupSmallMap(std::initializer_list<std::pair<AllocGroup, T>> Inits) { + Elems.reserve(Inits.size()); + for (const auto &E : Inits) + Elems.push_back(E); + llvm::sort(Elems, [](const ElemT &LHS, const ElemT &RHS) { + return LHS.first < RHS.first; + }); + } + + iterator begin() { return Elems.begin(); } + iterator end() { return Elems.end(); } + iterator find(AllocGroup G) { + auto I = lower_bound(Elems, G, compareKey); + return (I->first == G) ? I : end(); + } + + bool empty() const { return Elems.empty(); } + size_t size() const { return Elems.size(); } + + T &operator[](AllocGroup G) { + auto I = lower_bound(Elems, G, compareKey); + if (I == Elems.end() || I->first != G) + I = Elems.insert(I, std::make_pair(G, T())); + return I->second; + } + +private: + VectorTy Elems; +}; + +/// Print an AllocGroup. +raw_ostream &operator<<(raw_ostream &OS, AllocGroup AG); + +} // end namespace jitlink + +template <> struct DenseMapInfo<jitlink::MemProt> { + static inline jitlink::MemProt getEmptyKey() { + return jitlink::MemProt(~uint8_t(0)); + } + static inline jitlink::MemProt getTombstoneKey() { + return jitlink::MemProt(~uint8_t(0) - 1); + } + static unsigned getHashValue(const jitlink::MemProt &Val) { + using UT = std::underlying_type_t<jitlink::MemProt>; + return DenseMapInfo<UT>::getHashValue(static_cast<UT>(Val)); + } + static bool isEqual(const jitlink::MemProt &LHS, + const jitlink::MemProt &RHS) { + return LHS == RHS; + } +}; + +template <> struct DenseMapInfo<jitlink::AllocGroup> { + static inline jitlink::AllocGroup getEmptyKey() { + return jitlink::AllocGroup(~uint8_t(0)); + } + static inline jitlink::AllocGroup getTombstoneKey() { + return jitlink::AllocGroup(~uint8_t(0) - 1); + } + static unsigned getHashValue(const jitlink::AllocGroup &Val) { + return DenseMapInfo<jitlink::AllocGroup::underlying_type>::getHashValue( + Val.Id); + } + static bool isEqual(const jitlink::AllocGroup &LHS, + const jitlink::AllocGroup &RHS) { + return LHS == RHS; + } +}; + +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_MEMORYFLAGS_H diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h new file mode 100644 index 000000000000..c20f62d515ec --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h @@ -0,0 +1,63 @@ +//===---------------------- TableManager.h ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Fix edge for edge that needs an entry to reference the target symbol +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_TABLEMANAGER_H +#define LLVM_EXECUTIONENGINE_JITLINK_TABLEMANAGER_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/Support/Debug.h" + +namespace llvm { +namespace jitlink { + +/// A CRTP base for tables that are built on demand, e.g. Global Offset Tables +/// and Procedure Linkage Tables. +/// The getEntyrForTarget function returns the table entry corresponding to the +/// given target, calling down to the implementation class to build an entry if +/// one does not already exist. +template <typename TableManagerImplT> class TableManager { +public: + /// Return the constructed entry + /// + /// Use parameter G to construct the entry for target symbol + Symbol &getEntryForTarget(LinkGraph &G, Symbol &Target) { + assert(Target.hasName() && "Edge cannot point to anonymous target"); + + auto EntryI = Entries.find(Target.getName()); + + // Build the entry if it doesn't exist. + if (EntryI == Entries.end()) { + auto &Entry = impl().createEntry(G, Target); + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Created" << impl().getSectionName() << "entry for " + << Target.getName() << ": " << Entry << "\n"; + }); + EntryI = Entries.insert(std::make_pair(Target.getName(), &Entry)).first; + } + + assert(EntryI != Entries.end() && "Could not get entry symbol"); + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Using " << impl().getSectionName() << " entry " + << *EntryI->second << "\n"; + }); + return *EntryI->second; + } + +private: + TableManagerImplT &impl() { return static_cast<TableManagerImplT &>(*this); } + DenseMap<StringRef, Symbol *> Entries; +}; + +} // namespace jitlink +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h new file mode 100644 index 000000000000..994ce783b058 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h @@ -0,0 +1,38 @@ +//=== aarch64.h - Generic JITLink aarch64 edge kinds, utilities -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic utilities for graphs representing aarch64 objects. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H +#define LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" + +namespace llvm { +namespace jitlink { +namespace aarch64 { + +/// Represets aarch64 fixups +enum EdgeKind_aarch64 : Edge::Kind { + + /// Set a CALL immediate field to bits [27:2] of X = Target - Fixup + Addend + R_AARCH64_CALL26 = Edge::FirstRelocation, + +}; + +/// Returns a string name for the given aarch64 edge. For debugging purposes +/// only +const char *getEdgeKindName(Edge::Kind K); + +} // namespace aarch64 +} // namespace jitlink +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h index a4509f3888a4..b8d08d88c1c9 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h @@ -70,7 +70,19 @@ enum EdgeKind_riscv : Edge::Kind { /// /// Fixup expression: /// Fixup <- (Target - Fixup + Addend) - R_RISCV_CALL + R_RISCV_CALL, + + /// PC relative GOT offset + /// + /// Fixup expression: + /// Fixup <- (GOT - Fixup + Addend) >> 12 + R_RISCV_GOT_HI20, + + /// PC relative call by PLT + /// + /// Fixup expression: + /// Fixup <- (Target - Fixup + Addend) + R_RISCV_CALL_PLT }; diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h index 006d983537e9..3130ea381534 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h @@ -14,6 +14,7 @@ #define LLVM_EXECUTIONENGINE_JITLINK_X86_64_H #include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/JITLink/TableManager.h" #include <limits> @@ -42,6 +43,16 @@ enum EdgeKind_x86_64 : Edge::Kind { /// Pointer32, + /// A signed 32-bit pointer value relocation + /// + /// Fixup expression: + /// Fixup <- Target + Addend : int32 + /// + /// Errors: + /// - The target must reside in the signed 32-bits([-2**31, 2**32 - 1]) of + /// the address space, otherwise an out-of-range error will be returned. + Pointer32Signed, + /// A 64-bit delta. /// /// Delta from the fixup to the target. @@ -85,6 +96,18 @@ enum EdgeKind_x86_64 : Edge::Kind { /// an out-of-range error will be returned. NegDelta32, + /// A 64-bit GOT delta. + /// + /// Delta from the global offset table to the target + /// + /// Fixup expression: + /// Fixup <- Target - GOTSymbol + Addend : int64 + /// + /// Errors: + /// - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section + /// symbol was not been defined. + Delta64FromGOT, + /// A 32-bit PC-relative branch. /// /// Represents a PC-relative call or branch to a target. This can be used to @@ -120,7 +143,7 @@ enum EdgeKind_x86_64 : Edge::Kind { /// This edge kind has the same fixup expression as BranchPCRel32, but further /// identifies the call/branch as being to a pointer jump stub. For edges of /// this kind the jump stub should not be bypassed (use - /// BranchPCRel32ToPtrJumpStubRelaxable for that), but the pointer location + /// BranchPCRel32ToPtrJumpStubBypassable for that), but the pointer location /// target may be recorded to allow manipulation at runtime. /// /// Fixup expression: @@ -136,7 +159,8 @@ enum EdgeKind_x86_64 : Edge::Kind { /// /// The edge kind has the same fixup expression as BranchPCRel32ToPtrJumpStub, /// but identifies the call/branch as being to a pointer jump stub that may be - /// bypassed if the ultimate target is within range of the fixup location. + /// bypassed with a direct jump to the ultimate target if the ultimate target + /// is within range of the fixup location. /// /// Fixup expression: /// Fixup <- Target - Fixup + Addend - 4: int32 @@ -145,7 +169,7 @@ enum EdgeKind_x86_64 : Edge::Kind { /// - The result of the fixup expression must fit into an int32, otherwise /// an out-of-range error will be returned. /// - BranchPCRel32ToPtrJumpStubRelaxable, + BranchPCRel32ToPtrJumpStubBypassable, /// A GOT entry getter/constructor, transformed to Delta32 pointing at the GOT /// entry for the original target. @@ -167,7 +191,62 @@ enum EdgeKind_x86_64 : Edge::Kind { /// RequestGOTAndTransformToDelta32, - /// A PC-relative reference to a GOT entry, relaxable if GOT entry target + /// A GOT entry getter/constructor, transformed to Delta64 pointing at the GOT + /// entry for the original target. + /// + /// Indicates that this edge should be transformed into a Delta64 targeting + /// the GOT entry for the edge's current target, maintaining the same addend. + /// A GOT entry for the target should be created if one does not already + /// exist. + /// + /// Edges of this kind are usually handled by a GOT builder pass inserted by + /// default. + /// + /// Fixup expression: + /// NONE + /// + /// Errors: + /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup + /// phase will result in an assert/unreachable during the fixup phase. + /// + RequestGOTAndTransformToDelta64, + + /// A GOT entry offset within GOT getter/constructor, transformed to + /// Delta64FromGOT + /// pointing at the GOT entry for the original target + /// + /// Indicates that this edge should be transformed into a Delta64FromGOT + /// targeting + /// the GOT entry for the edge's current target, maintaining the same addend. + /// A GOT entry for the target should be created if one does not already + /// exist. + /// + /// Edges of this kind are usually handled by a GOT builder pass inserted by + /// default + /// + /// Fixup expression: + /// NONE + /// + /// Errors: + /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup + /// phase will result in an assert/unreachable during the fixup phase + RequestGOTAndTransformToDelta64FromGOT, + + /// A PC-relative load of a GOT entry, relaxable if GOT entry target is + /// in-range of the fixup + /// + /// TODO: Explain the optimization + /// + /// Fixup expression + /// Fixup <- Target - (Fixup + 4) + Addend : int32 + /// + /// Errors: + /// - The result of the fixup expression must fit into an int32, otherwise + /// an out-of-range error will be returned. + // + PCRel32GOTLoadRelaxable, + + /// A PC-relative REX load of a GOT entry, relaxable if GOT entry target /// is in-range of the fixup. /// /// If the GOT entry target is in-range of the fixup then the load from the @@ -180,17 +259,39 @@ enum EdgeKind_x86_64 : Edge::Kind { /// - The result of the fixup expression must fit into an int32, otherwise /// an out-of-range error will be returned. /// - PCRel32GOTLoadRelaxable, + PCRel32GOTLoadREXRelaxable, - /// A GOT entry getter/constructor, transformed to PCRel32ToGOTLoadRelaxable - /// pointing at the GOT entry for the original target. + /// A GOT entry getter/constructor, transformed to + /// PCRel32ToGOTLoadREXRelaxable pointing at the GOT entry for the original + /// target. /// - /// Indicates that this edge should be transformed into a - /// PC32ToGOTLoadRelaxable targeting the GOT entry for the edge's current - /// target, maintaining the same addend. A GOT entry for the target should be - /// created if one does not already exist. + /// Indicates that this edge should be lowered to a PC32ToGOTLoadREXRelaxable + /// targeting the GOT entry for the edge's current target, maintaining the + /// same addend. A GOT entry for the target should be created if one does not + /// already exist. /// - /// Edges of this kind are usually handled by a GOT builder pass inserted by + /// Edges of this kind are usually lowered by a GOT builder pass inserted by + /// default. + /// + /// Fixup expression: + /// NONE + /// + /// Errors: + /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup + /// phase will result in an assert/unreachable during the fixup phase. + /// + RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable, + + /// A GOT entry getter/constructor, transformed to + /// PCRel32ToGOTLoadRelaxable pointing at the GOT entry for the original + /// target. + /// + /// Indicates that this edge should be lowered to a PC32ToGOTLoadRelaxable + /// targeting the GOT entry for the edge's current target, maintaining the + /// same addend. A GOT entry for the target should be created if one does not + /// already exist. + /// + /// Edges of this kind are usually lowered by a GOT builder pass inserted by /// default. /// /// Fixup expression: @@ -202,10 +303,10 @@ enum EdgeKind_x86_64 : Edge::Kind { /// RequestGOTAndTransformToPCRel32GOTLoadRelaxable, - /// A PC-relative reference to a Thread Local Variable Pointer (TLVP) entry, + /// A PC-relative REX load of a Thread Local Variable Pointer (TLVP) entry, /// relaxable if the TLVP entry target is in-range of the fixup. /// - /// If the TLVP entry target is in-range of the fixup then the load frmo the + /// If the TLVP entry target is in-range of the fixup then the load from the /// TLVP may be replaced with a direct memory address calculation. /// /// The target of this edge must be a thread local variable entry of the form @@ -222,15 +323,18 @@ enum EdgeKind_x86_64 : Edge::Kind { /// - The target must be either external, or a TLV entry of the required /// form, otherwise a malformed TLV entry error will be returned. /// - PCRel32TLVPLoadRelaxable, + PCRel32TLVPLoadREXRelaxable, + + /// TODO: Explain the generic edge kind + RequestTLSDescInGOTAndTransformToDelta32, /// A TLVP entry getter/constructor, transformed to - /// Delta32ToTLVPLoadRelaxable. + /// Delta32ToTLVPLoadREXRelaxable. /// /// Indicates that this edge should be transformed into a - /// Delta32ToTLVPLoadRelaxable targeting the TLVP entry for the edge's current - /// target. A TLVP entry for the target should be created if one does not - /// already exist. + /// Delta32ToTLVPLoadREXRelaxable targeting the TLVP entry for the edge's + /// current target. A TLVP entry for the target should be created if one does + /// not already exist. /// /// Fixup expression: /// NONE @@ -239,7 +343,7 @@ enum EdgeKind_x86_64 : Edge::Kind { /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup /// phase will result in an assert/unreachable during the fixup phase. /// - RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable + RequestTLVPAndTransformToPCRel32TLVPLoadREXRelaxable }; /// Returns a string name for the given x86-64 edge. For debugging purposes @@ -258,7 +362,8 @@ inline bool isInRangeForImmS32(int64_t Value) { } /// Apply fixup expression for edge to block content. -inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) { +inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E, + const Symbol *GOTSymbol) { using namespace support; char *BlockWorkingMem = B.getAlreadyMutableContent().data(); @@ -281,12 +386,21 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) { return makeTargetOutOfRangeError(G, B, E); break; } + case Pointer32Signed: { + int64_t Value = E.getTarget().getAddress() + E.getAddend(); + if (LLVM_LIKELY(isInRangeForImmS32(Value))) + *(little32_t *)FixupPtr = Value; + else + return makeTargetOutOfRangeError(G, B, E); + break; + } case BranchPCRel32: case BranchPCRel32ToPtrJumpStub: - case BranchPCRel32ToPtrJumpStubRelaxable: + case BranchPCRel32ToPtrJumpStubBypassable: case PCRel32GOTLoadRelaxable: - case PCRel32TLVPLoadRelaxable: { + case PCRel32GOTLoadREXRelaxable: + case PCRel32TLVPLoadREXRelaxable: { int64_t Value = E.getTarget().getAddress() - (FixupAddress + 4) + E.getAddend(); if (LLVM_LIKELY(isInRangeForImmS32(Value))) @@ -325,6 +439,13 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) { return makeTargetOutOfRangeError(G, B, E); break; } + case Delta64FromGOT: { + assert(GOTSymbol && "No GOT section symbol"); + int64_t Value = + E.getTarget().getAddress() - GOTSymbol->getAddress() + E.getAddend(); + *(little64_t *)FixupPtr = Value; + break; + } default: { // If you hit this you should check that *constructor and other non-fixup @@ -395,6 +516,114 @@ inline Symbol &createAnonymousPointerJumpStub(LinkGraph &G, false); } +/// Global Offset Table Builder. +class GOTTableManager : public TableManager<GOTTableManager> { +public: + static StringRef getSectionName() { return "$__GOT"; } + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + Edge::Kind KindToSet = Edge::Invalid; + switch (E.getKind()) { + case x86_64::Delta64FromGOT: { + // we need to make sure that the GOT section exists, but don't otherwise + // need to fix up this edge + getGOTSection(G); + return false; + } + case x86_64::RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable: + KindToSet = x86_64::PCRel32GOTLoadREXRelaxable; + break; + case x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable: + KindToSet = x86_64::PCRel32GOTLoadRelaxable; + break; + case x86_64::RequestGOTAndTransformToDelta64: + KindToSet = x86_64::Delta64; + break; + case x86_64::RequestGOTAndTransformToDelta64FromGOT: + KindToSet = x86_64::Delta64FromGOT; + break; + case x86_64::RequestGOTAndTransformToDelta32: + KindToSet = x86_64::Delta32; + break; + default: + return false; + } + assert(KindToSet != Edge::Invalid && + "Fell through switch, but no new kind to set"); + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << formatv("{0:x}", B->getFixupAddress(E)) << " (" + << formatv("{0:x}", B->getAddress()) << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + E.setKind(KindToSet); + E.setTarget(getEntryForTarget(G, E.getTarget())); + return true; + } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + return createAnonymousPointer(G, getGOTSection(G), &Target); + } + +private: + Section &getGOTSection(LinkGraph &G) { + if (!GOTSection) + GOTSection = &G.createSection(getSectionName(), MemProt::Read); + return *GOTSection; + } + + Section *GOTSection = nullptr; +}; + +/// Procedure Linkage Table Builder. +class PLTTableManager : public TableManager<PLTTableManager> { +public: + PLTTableManager(GOTTableManager &GOT) : GOT(GOT) {} + + static StringRef getSectionName() { return "$__STUBS"; } + + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + if (E.getKind() == x86_64::BranchPCRel32 && !E.getTarget().isDefined()) { + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << formatv("{0:x}", B->getFixupAddress(E)) << " (" + << formatv("{0:x}", B->getAddress()) << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + // Set the edge kind to Branch32ToPtrJumpStubBypassable to enable it to + // be optimized when the target is in-range. + E.setKind(x86_64::BranchPCRel32ToPtrJumpStubBypassable); + E.setTarget(getEntryForTarget(G, E.getTarget())); + return true; + } + return false; + } + + Symbol &createEntry(LinkGraph &G, Symbol &Target) { + return createAnonymousPointerJumpStub(G, getStubsSection(G), + GOT.getEntryForTarget(G, Target)); + } + +public: + Section &getStubsSection(LinkGraph &G) { + if (!PLTSection) + PLTSection = + &G.createSection(getSectionName(), MemProt::Read | MemProt::Exec); + return *PLTSection; + } + + GOTTableManager &GOT; + Section *PLTSection = nullptr; +}; + +/// Optimize the GOT and Stub relocations if the edge target address is in range +/// 1. PCRel32GOTLoadRelaxable. For this edge kind, if the target is in range, +/// then replace GOT load with lea +/// 2. BranchPCRel32ToPtrJumpStubRelaxable. For this edge kind, if the target is +/// in range, replace a indirect jump by plt stub with a direct jump to the +/// target +Error optimizeGOTAndStubAccesses(LinkGraph &G); + } // namespace x86_64 } // end namespace jitlink } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/MCJIT.h b/llvm/include/llvm/ExecutionEngine/MCJIT.h index 8253bf98963b..adce98f380c5 100644 --- a/llvm/include/llvm/ExecutionEngine/MCJIT.h +++ b/llvm/include/llvm/ExecutionEngine/MCJIT.h @@ -26,6 +26,9 @@ namespace { // delete it all as dead code, even with whole program optimization, // yet is effectively a NO-OP. As the compiler isn't smart enough // to know that getenv() never returns -1, this will do the job. + // This is so that globals in the translation units where these functions + // are defined are forced to be initialized, populating various + // registries. if (std::getenv("bar") != (char*) -1) return; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index e832d8d57dfa..5cac65b49a05 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -21,7 +21,7 @@ #include "llvm/ExecutionEngine/JITSymbol.h" #include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" -#include "llvm/ExecutionEngine/OrcV1Deprecation.h" +#include "llvm/ExecutionEngine/Orc/TaskDispatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ExtensibleRTTI.h" @@ -434,13 +434,16 @@ class SymbolsNotFound : public ErrorInfo<SymbolsNotFound> { public: static char ID; - SymbolsNotFound(SymbolNameSet Symbols); - SymbolsNotFound(SymbolNameVector Symbols); + SymbolsNotFound(std::shared_ptr<SymbolStringPool> SSP, SymbolNameSet Symbols); + SymbolsNotFound(std::shared_ptr<SymbolStringPool> SSP, + SymbolNameVector Symbols); std::error_code convertToErrorCode() const override; void log(raw_ostream &OS) const override; + std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; } const SymbolNameVector &getSymbols() const { return Symbols; } private: + std::shared_ptr<SymbolStringPool> SSP; SymbolNameVector Symbols; }; @@ -449,12 +452,15 @@ class SymbolsCouldNotBeRemoved : public ErrorInfo<SymbolsCouldNotBeRemoved> { public: static char ID; - SymbolsCouldNotBeRemoved(SymbolNameSet Symbols); + SymbolsCouldNotBeRemoved(std::shared_ptr<SymbolStringPool> SSP, + SymbolNameSet Symbols); std::error_code convertToErrorCode() const override; void log(raw_ostream &OS) const override; + std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; } const SymbolNameSet &getSymbols() const { return Symbols; } private: + std::shared_ptr<SymbolStringPool> SSP; SymbolNameSet Symbols; }; @@ -466,13 +472,17 @@ class MissingSymbolDefinitions : public ErrorInfo<MissingSymbolDefinitions> { public: static char ID; - MissingSymbolDefinitions(std::string ModuleName, SymbolNameVector Symbols) - : ModuleName(std::move(ModuleName)), Symbols(std::move(Symbols)) {} + MissingSymbolDefinitions(std::shared_ptr<SymbolStringPool> SSP, + std::string ModuleName, SymbolNameVector Symbols) + : SSP(std::move(SSP)), ModuleName(std::move(ModuleName)), + Symbols(std::move(Symbols)) {} std::error_code convertToErrorCode() const override; void log(raw_ostream &OS) const override; + std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; } const std::string &getModuleName() const { return ModuleName; } const SymbolNameVector &getSymbols() const { return Symbols; } private: + std::shared_ptr<SymbolStringPool> SSP; std::string ModuleName; SymbolNameVector Symbols; }; @@ -485,13 +495,17 @@ class UnexpectedSymbolDefinitions : public ErrorInfo<UnexpectedSymbolDefinitions public: static char ID; - UnexpectedSymbolDefinitions(std::string ModuleName, SymbolNameVector Symbols) - : ModuleName(std::move(ModuleName)), Symbols(std::move(Symbols)) {} + UnexpectedSymbolDefinitions(std::shared_ptr<SymbolStringPool> SSP, + std::string ModuleName, SymbolNameVector Symbols) + : SSP(std::move(SSP)), ModuleName(std::move(ModuleName)), + Symbols(std::move(Symbols)) {} std::error_code convertToErrorCode() const override; void log(raw_ostream &OS) const override; + std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; } const std::string &getModuleName() const { return ModuleName; } const SymbolNameVector &getSymbols() const { return Symbols; } private: + std::shared_ptr<SymbolStringPool> SSP; std::string ModuleName; SymbolNameVector Symbols; }; @@ -1241,21 +1255,6 @@ public: const DenseMap<JITDylib *, SymbolLookupSet> &InitSyms); }; -/// Represents an abstract task for ORC to run. -class Task : public RTTIExtends<Task, RTTIRoot> { -public: - static char ID; - - /// Description of the task to be performed. Used for logging. - virtual void printDescription(raw_ostream &OS) = 0; - - /// Run the task. - virtual void run() = 0; - -private: - void anchor() override; -}; - /// A materialization task. class MaterializationTask : public RTTIExtends<MaterializationTask, Task> { public: @@ -1285,13 +1284,16 @@ public: /// For reporting errors. using ErrorReporter = std::function<void(Error)>; + /// Send a result to the remote. + using SendResultFunction = unique_function<void(shared::WrapperFunctionResult)>; + /// For dispatching ORC tasks (typically materialization tasks). using DispatchTaskFunction = unique_function<void(std::unique_ptr<Task> T)>; /// An asynchronous wrapper-function callable from the executor via /// jit-dispatch. using JITDispatchHandlerFunction = unique_function<void( - ExecutorProcessControl::SendResultFunction SendResult, + SendResultFunction SendResult, const char *ArgData, size_t ArgSize)>; /// A map associating tag names with asynchronous wrapper function @@ -1303,13 +1305,19 @@ public: /// object. ExecutionSession(std::unique_ptr<ExecutorProcessControl> EPC); - /// End the session. Closes all JITDylibs. + /// End the session. Closes all JITDylibs and disconnects from the + /// executor. Error endSession(); /// Get the ExecutorProcessControl object associated with this /// ExecutionSession. ExecutorProcessControl &getExecutorProcessControl() { return *EPC; } + /// Get the SymbolStringPool for this instance. + std::shared_ptr<SymbolStringPool> getSymbolStringPool() { + return EPC->getSymbolStringPool(); + } + /// Add a symbol name to the SymbolStringPool and return a pointer to it. SymbolStringPtr intern(StringRef SymName) { return EPC->intern(SymName); } @@ -1462,10 +1470,9 @@ public: /// \endcode{.cpp} /// /// The given OnComplete function will be called to return the result. - void callWrapperAsync(ExecutorProcessControl::SendResultFunction OnComplete, - JITTargetAddress WrapperFnAddr, - ArrayRef<char> ArgBuffer) { - EPC->callWrapperAsync(std::move(OnComplete), WrapperFnAddr, ArgBuffer); + template <typename... ArgTs> + void callWrapperAsync(ArgTs &&... Args) { + EPC->callWrapperAsync(std::forward<ArgTs>(Args)...); } /// Run a wrapper function in the executor. The wrapper function should be @@ -1474,30 +1481,18 @@ public: /// \code{.cpp} /// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size); /// \endcode{.cpp} - shared::WrapperFunctionResult callWrapper(JITTargetAddress WrapperFnAddr, + shared::WrapperFunctionResult callWrapper(ExecutorAddr WrapperFnAddr, ArrayRef<char> ArgBuffer) { - std::promise<shared::WrapperFunctionResult> RP; - auto RF = RP.get_future(); - callWrapperAsync( - [&](shared::WrapperFunctionResult R) { RP.set_value(std::move(R)); }, - WrapperFnAddr, ArgBuffer); - return RF.get(); + return EPC->callWrapper(WrapperFnAddr, ArgBuffer); } /// Run a wrapper function using SPS to serialize the arguments and /// deserialize the results. template <typename SPSSignature, typename SendResultT, typename... ArgTs> - void callSPSWrapperAsync(SendResultT &&SendResult, - JITTargetAddress WrapperFnAddr, + void callSPSWrapperAsync(ExecutorAddr WrapperFnAddr, SendResultT &&SendResult, const ArgTs &...Args) { - shared::WrapperFunction<SPSSignature>::callAsync( - [this, - WrapperFnAddr](ExecutorProcessControl::SendResultFunction SendResult, - const char *ArgData, size_t ArgSize) { - callWrapperAsync(std::move(SendResult), WrapperFnAddr, - ArrayRef<char>(ArgData, ArgSize)); - }, - std::move(SendResult), Args...); + EPC->callSPSWrapperAsync<SPSSignature, SendResultT, ArgTs...>( + WrapperFnAddr, std::forward<SendResultT>(SendResult), Args...); } /// Run a wrapper function using SPS to serialize the arguments and @@ -1506,13 +1501,10 @@ public: /// If SPSSignature is a non-void function signature then the second argument /// (the first in the Args list) should be a reference to a return value. template <typename SPSSignature, typename... WrapperCallArgTs> - Error callSPSWrapper(JITTargetAddress WrapperFnAddr, + Error callSPSWrapper(ExecutorAddr WrapperFnAddr, WrapperCallArgTs &&...WrapperCallArgs) { - return shared::WrapperFunction<SPSSignature>::call( - [this, WrapperFnAddr](const char *ArgData, size_t ArgSize) { - return callWrapper(WrapperFnAddr, ArrayRef<char>(ArgData, ArgSize)); - }, - std::forward<WrapperCallArgTs>(WrapperCallArgs)...); + return EPC->callSPSWrapper<SPSSignature, WrapperCallArgTs...>( + WrapperFnAddr, std::forward<WrapperCallArgTs>(WrapperCallArgs)...); } /// Wrap a handler that takes concrete argument types (and a sender for a @@ -1525,7 +1517,7 @@ public: template <typename SPSSignature, typename HandlerT> static JITDispatchHandlerFunction wrapAsyncWithSPS(HandlerT &&H) { return [H = std::forward<HandlerT>(H)]( - ExecutorProcessControl::SendResultFunction SendResult, + SendResultFunction SendResult, const char *ArgData, size_t ArgSize) mutable { shared::WrapperFunction<SPSSignature>::handleAsync(ArgData, ArgSize, H, std::move(SendResult)); @@ -1564,7 +1556,7 @@ public: /// This should be called by the ExecutorProcessControl instance in response /// to incoming jit-dispatch requests from the executor. void - runJITDispatchHandler(ExecutorProcessControl::SendResultFunction SendResult, + runJITDispatchHandler(SendResultFunction SendResult, JITTargetAddress HandlerFnTagAddr, ArrayRef<char> ArgBuffer); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h new file mode 100644 index 000000000000..af092b3287d3 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h @@ -0,0 +1,64 @@ +//===--- DebugerSupportPlugin.h -- Utils for debugger support ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generates debug objects and registers them using the jit-loader-gdb protocol. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H +#define LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H + +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h" +#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" + +namespace llvm { +namespace orc { + +/// For each object containing debug info, installs JITLink passes to synthesize +/// a debug object and then register it via the GDB JIT-registration interface. +/// +/// Currently MachO only. For ELF use DebugObjectManagerPlugin. These two +/// plugins will be merged in the near future. +class GDBJITDebugInfoRegistrationPlugin : public ObjectLinkingLayer::Plugin { +public: + class DebugSectionSynthesizer { + public: + virtual ~DebugSectionSynthesizer() {} + virtual Error startSynthesis() = 0; + virtual Error completeSynthesisAndRegister() = 0; + }; + + static Expected<std::unique_ptr<GDBJITDebugInfoRegistrationPlugin>> + Create(ExecutionSession &ES, JITDylib &ProcessJD, const Triple &TT); + + GDBJITDebugInfoRegistrationPlugin(ExecutorAddr RegisterActionAddr) + : RegisterActionAddr(RegisterActionAddr) {} + + Error notifyFailed(MaterializationResponsibility &MR) override; + Error notifyRemovingResources(ResourceKey K) override; + + void notifyTransferringResources(ResourceKey DstKey, + ResourceKey SrcKey) override; + + void modifyPassConfig(MaterializationResponsibility &MR, + jitlink::LinkGraph &LG, + jitlink::PassConfiguration &PassConfig) override; + +private: + void modifyPassConfigForMachO(MaterializationResponsibility &MR, + jitlink::LinkGraph &LG, + jitlink::PassConfiguration &PassConfig); + + ExecutorAddr RegisterActionAddr; +}; + +} // namespace orc +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h new file mode 100644 index 000000000000..20da3e3b89eb --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h @@ -0,0 +1,330 @@ +//===-- ELFNixPlatform.h -- Utilities for executing ELF in Orc --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Linux/BSD support for executing JIT'd ELF in Orc. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_ELFNIXPLATFORM_H +#define LLVM_EXECUTIONENGINE_ORC_ELFNIXPLATFORM_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" +#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" + +#include <future> +#include <thread> +#include <vector> + +namespace llvm { +namespace orc { + +struct ELFPerObjectSectionsToRegister { + ExecutorAddrRange EHFrameSection; + ExecutorAddrRange ThreadDataSection; +}; + +struct ELFNixJITDylibInitializers { + using SectionList = std::vector<ExecutorAddrRange>; + + ELFNixJITDylibInitializers(std::string Name, ExecutorAddr DSOHandleAddress) + : Name(std::move(Name)), DSOHandleAddress(std::move(DSOHandleAddress)) {} + + std::string Name; + ExecutorAddr DSOHandleAddress; + + StringMap<SectionList> InitSections; +}; + +class ELFNixJITDylibDeinitializers {}; + +using ELFNixJITDylibInitializerSequence = + std::vector<ELFNixJITDylibInitializers>; + +using ELFNixJITDylibDeinitializerSequence = + std::vector<ELFNixJITDylibDeinitializers>; + +/// Mediates between ELFNix initialization and ExecutionSession state. +class ELFNixPlatform : public Platform { +public: + /// Try to create a ELFNixPlatform instance, adding the ORC runtime to the + /// given JITDylib. + /// + /// The ORC runtime requires access to a number of symbols in + /// libc++. It is up to the caller to ensure that the requried + /// symbols can be referenced by code added to PlatformJD. The + /// standard way to achieve this is to first attach dynamic library + /// search generators for either the given process, or for the + /// specific required libraries, to PlatformJD, then to create the + /// platform instance: + /// + /// \code{.cpp} + /// auto &PlatformJD = ES.createBareJITDylib("stdlib"); + /// PlatformJD.addGenerator( + /// ExitOnErr(EPCDynamicLibrarySearchGenerator + /// ::GetForTargetProcess(EPC))); + /// ES.setPlatform( + /// ExitOnErr(ELFNixPlatform::Create(ES, ObjLayer, EPC, PlatformJD, + /// "/path/to/orc/runtime"))); + /// \endcode + /// + /// Alternatively, these symbols could be added to another JITDylib that + /// PlatformJD links against. + /// + /// Clients are also responsible for ensuring that any JIT'd code that + /// depends on runtime functions (including any code using TLV or static + /// destructors) can reference the runtime symbols. This is usually achieved + /// by linking any JITDylibs containing regular code against + /// PlatformJD. + /// + /// By default, ELFNixPlatform will add the set of aliases returned by the + /// standardPlatformAliases function. This includes both required aliases + /// (e.g. __cxa_atexit -> __orc_rt_elf_cxa_atexit for static destructor + /// support), and optional aliases that provide JIT versions of common + /// functions (e.g. dlopen -> __orc_rt_elf_jit_dlopen). Clients can + /// override these defaults by passing a non-None value for the + /// RuntimeAliases function, in which case the client is responsible for + /// setting up all aliases (including the required ones). + static Expected<std::unique_ptr<ELFNixPlatform>> + Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer, + JITDylib &PlatformJD, const char *OrcRuntimePath, + Optional<SymbolAliasMap> RuntimeAliases = None); + + ExecutionSession &getExecutionSession() const { return ES; } + ObjectLinkingLayer &getObjectLinkingLayer() const { return ObjLinkingLayer; } + + Error setupJITDylib(JITDylib &JD) override; + Error notifyAdding(ResourceTracker &RT, + const MaterializationUnit &MU) override; + Error notifyRemoving(ResourceTracker &RT) override; + + /// Returns an AliasMap containing the default aliases for the ELFNixPlatform. + /// This can be modified by clients when constructing the platform to add + /// or remove aliases. + static SymbolAliasMap standardPlatformAliases(ExecutionSession &ES); + + /// Returns the array of required CXX aliases. + static ArrayRef<std::pair<const char *, const char *>> requiredCXXAliases(); + + /// Returns the array of standard runtime utility aliases for ELF. + static ArrayRef<std::pair<const char *, const char *>> + standardRuntimeUtilityAliases(); + + /// Returns true if the given section name is an initializer section. + static bool isInitializerSection(StringRef SecName); + +private: + // The ELFNixPlatformPlugin scans/modifies LinkGraphs to support ELF + // platform features including initializers, exceptions, TLV, and language + // runtime registration. + class ELFNixPlatformPlugin : public ObjectLinkingLayer::Plugin { + public: + ELFNixPlatformPlugin(ELFNixPlatform &MP) : MP(MP) {} + + void modifyPassConfig(MaterializationResponsibility &MR, + jitlink::LinkGraph &G, + jitlink::PassConfiguration &Config) override; + + SyntheticSymbolDependenciesMap + getSyntheticSymbolDependencies(MaterializationResponsibility &MR) override; + + // FIXME: We should be tentatively tracking scraped sections and discarding + // if the MR fails. + Error notifyFailed(MaterializationResponsibility &MR) override { + return Error::success(); + } + + Error notifyRemovingResources(ResourceKey K) override { + return Error::success(); + } + + void notifyTransferringResources(ResourceKey DstKey, + ResourceKey SrcKey) override {} + + private: + using InitSymbolDepMap = + DenseMap<MaterializationResponsibility *, JITLinkSymbolSet>; + + void addInitializerSupportPasses(MaterializationResponsibility &MR, + jitlink::PassConfiguration &Config); + + void addDSOHandleSupportPasses(MaterializationResponsibility &MR, + jitlink::PassConfiguration &Config); + + void addEHAndTLVSupportPasses(MaterializationResponsibility &MR, + jitlink::PassConfiguration &Config); + + Error preserveInitSections(jitlink::LinkGraph &G, + MaterializationResponsibility &MR); + + Error registerInitSections(jitlink::LinkGraph &G, JITDylib &JD); + + Error fixTLVSectionsAndEdges(jitlink::LinkGraph &G, JITDylib &JD); + + std::mutex PluginMutex; + ELFNixPlatform &MP; + InitSymbolDepMap InitSymbolDeps; + }; + + using SendInitializerSequenceFn = + unique_function<void(Expected<ELFNixJITDylibInitializerSequence>)>; + + using SendDeinitializerSequenceFn = + unique_function<void(Expected<ELFNixJITDylibDeinitializerSequence>)>; + + using SendSymbolAddressFn = unique_function<void(Expected<ExecutorAddr>)>; + + static bool supportedTarget(const Triple &TT); + + ELFNixPlatform(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer, + JITDylib &PlatformJD, + std::unique_ptr<DefinitionGenerator> OrcRuntimeGenerator, + Error &Err); + + // Associate ELFNixPlatform JIT-side runtime support functions with handlers. + Error associateRuntimeSupportFunctions(JITDylib &PlatformJD); + + void getInitializersBuildSequencePhase(SendInitializerSequenceFn SendResult, + JITDylib &JD, + std::vector<JITDylibSP> DFSLinkOrder); + + void getInitializersLookupPhase(SendInitializerSequenceFn SendResult, + JITDylib &JD); + + void rt_getInitializers(SendInitializerSequenceFn SendResult, + StringRef JDName); + + void rt_getDeinitializers(SendDeinitializerSequenceFn SendResult, + ExecutorAddr Handle); + + void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddr Handle, + StringRef SymbolName); + + // Records the addresses of runtime symbols used by the platform. + Error bootstrapELFNixRuntime(JITDylib &PlatformJD); + + Error registerInitInfo(JITDylib &JD, + ArrayRef<jitlink::Section *> InitSections); + + Error registerPerObjectSections(const ELFPerObjectSectionsToRegister &POSR); + + Expected<uint64_t> createPThreadKey(); + + ExecutionSession &ES; + ObjectLinkingLayer &ObjLinkingLayer; + + SymbolStringPtr DSOHandleSymbol; + std::atomic<bool> RuntimeBootstrapped{false}; + + ExecutorAddr orc_rt_elfnix_platform_bootstrap; + ExecutorAddr orc_rt_elfnix_platform_shutdown; + ExecutorAddr orc_rt_elfnix_register_object_sections; + ExecutorAddr orc_rt_elfnix_create_pthread_key; + + DenseMap<JITDylib *, SymbolLookupSet> RegisteredInitSymbols; + + // InitSeqs gets its own mutex to avoid locking the whole session when + // aggregating data from the jitlink. + std::mutex PlatformMutex; + DenseMap<JITDylib *, ELFNixJITDylibInitializers> InitSeqs; + std::vector<ELFPerObjectSectionsToRegister> BootstrapPOSRs; + + DenseMap<JITTargetAddress, JITDylib *> HandleAddrToJITDylib; + DenseMap<JITDylib *, uint64_t> JITDylibToPThreadKey; +}; + +namespace shared { + +using SPSELFPerObjectSectionsToRegister = + SPSTuple<SPSExecutorAddrRange, SPSExecutorAddrRange>; + +template <> +class SPSSerializationTraits<SPSELFPerObjectSectionsToRegister, + ELFPerObjectSectionsToRegister> { + +public: + static size_t size(const ELFPerObjectSectionsToRegister &MOPOSR) { + return SPSELFPerObjectSectionsToRegister::AsArgList::size( + MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection); + } + + static bool serialize(SPSOutputBuffer &OB, + const ELFPerObjectSectionsToRegister &MOPOSR) { + return SPSELFPerObjectSectionsToRegister::AsArgList::serialize( + OB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection); + } + + static bool deserialize(SPSInputBuffer &IB, + ELFPerObjectSectionsToRegister &MOPOSR) { + return SPSELFPerObjectSectionsToRegister::AsArgList::deserialize( + IB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection); + } +}; + +using SPSNamedExecutorAddrRangeSequenceMap = + SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRangeSequence>>; + +using SPSELFNixJITDylibInitializers = + SPSTuple<SPSString, SPSExecutorAddr, SPSNamedExecutorAddrRangeSequenceMap>; + +using SPSELFNixJITDylibInitializerSequence = + SPSSequence<SPSELFNixJITDylibInitializers>; + +/// Serialization traits for ELFNixJITDylibInitializers. +template <> +class SPSSerializationTraits<SPSELFNixJITDylibInitializers, + ELFNixJITDylibInitializers> { +public: + static size_t size(const ELFNixJITDylibInitializers &MOJDIs) { + return SPSELFNixJITDylibInitializers::AsArgList::size( + MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections); + } + + static bool serialize(SPSOutputBuffer &OB, + const ELFNixJITDylibInitializers &MOJDIs) { + return SPSELFNixJITDylibInitializers::AsArgList::serialize( + OB, MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections); + } + + static bool deserialize(SPSInputBuffer &IB, + ELFNixJITDylibInitializers &MOJDIs) { + return SPSELFNixJITDylibInitializers::AsArgList::deserialize( + IB, MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections); + } +}; + +using SPSELFJITDylibDeinitializers = SPSEmpty; + +using SPSELFJITDylibDeinitializerSequence = + SPSSequence<SPSELFJITDylibDeinitializers>; + +template <> +class SPSSerializationTraits<SPSELFJITDylibDeinitializers, + ELFNixJITDylibDeinitializers> { +public: + static size_t size(const ELFNixJITDylibDeinitializers &MOJDDs) { return 0; } + + static bool serialize(SPSOutputBuffer &OB, + const ELFNixJITDylibDeinitializers &MOJDDs) { + return true; + } + + static bool deserialize(SPSInputBuffer &IB, + ELFNixJITDylibDeinitializers &MOJDDs) { + MOJDDs = ELFNixJITDylibDeinitializers(); + return true; + } +}; + +} // end namespace shared +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_ELFNIXPLATFORM_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h index 410a202b3296..940d0d28ae83 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h @@ -14,6 +14,7 @@ #define LLVM_EXECUTIONENGINE_ORC_EPCDEBUGOBJECTREGISTRAR_H #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" #include "llvm/Support/Error.h" #include "llvm/Support/Memory.h" @@ -32,7 +33,7 @@ class ExecutionSession; /// Abstract interface for registering debug objects in the executor process. class DebugObjectRegistrar { public: - virtual Error registerDebugObject(sys::MemoryBlock) = 0; + virtual Error registerDebugObject(ExecutorAddrRange TargetMem) = 0; virtual ~DebugObjectRegistrar() {} }; @@ -40,14 +41,14 @@ public: /// executor process. class EPCDebugObjectRegistrar : public DebugObjectRegistrar { public: - EPCDebugObjectRegistrar(ExecutionSession &ES, JITTargetAddress RegisterFn) + EPCDebugObjectRegistrar(ExecutionSession &ES, ExecutorAddr RegisterFn) : ES(ES), RegisterFn(RegisterFn) {} - Error registerDebugObject(sys::MemoryBlock TargetMem) override; + Error registerDebugObject(ExecutorAddrRange TargetMem) override; private: ExecutionSession &ES; - JITTargetAddress RegisterFn; + ExecutorAddr RegisterFn; }; /// Create a ExecutorProcessControl-based DebugObjectRegistrar that emits debug diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h index 8cd6e9319a28..6d113a7bdf1a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h @@ -14,6 +14,7 @@ #define LLVM_EXECUTIONENGINE_ORC_EPCEHFRAMEREGISTRAR_H #include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" namespace llvm { namespace orc { @@ -33,8 +34,8 @@ public: /// Create a EPCEHFrameRegistrar with the given ExecutorProcessControl /// object and registration/deregistration function addresses. EPCEHFrameRegistrar(ExecutionSession &ES, - JITTargetAddress RegisterEHFrameWrapperFnAddr, - JITTargetAddress DeregisterEHFRameWrapperFnAddr) + ExecutorAddr RegisterEHFrameWrapperFnAddr, + ExecutorAddr DeregisterEHFRameWrapperFnAddr) : ES(ES), RegisterEHFrameWrapperFnAddr(RegisterEHFrameWrapperFnAddr), DeregisterEHFrameWrapperFnAddr(DeregisterEHFRameWrapperFnAddr) {} @@ -45,8 +46,8 @@ public: private: ExecutionSession &ES; - JITTargetAddress RegisterEHFrameWrapperFnAddr; - JITTargetAddress DeregisterEHFrameWrapperFnAddr; + ExecutorAddr RegisterEHFrameWrapperFnAddr; + ExecutorAddr DeregisterEHFrameWrapperFnAddr; }; } // end namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h new file mode 100644 index 000000000000..02e580c86f54 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h @@ -0,0 +1,67 @@ +//===- EPCGenericDylibManager.h -- Generic EPC Dylib management -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements dylib loading and searching by making calls to +// ExecutorProcessControl::callWrapper. +// +// This simplifies the implementaton of new ExecutorProcessControl instances, +// as this implementation will always work (at the cost of some performance +// overhead for the calls). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICDYLIBMANAGER_H +#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICDYLIBMANAGER_H + +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" +#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h" + +namespace llvm { +namespace orc { + +class SymbolLookupSet; + +class EPCGenericDylibManager { +public: + /// Function addresses for memory access. + struct SymbolAddrs { + ExecutorAddr Instance; + ExecutorAddr Open; + ExecutorAddr Lookup; + }; + + /// Create an EPCGenericMemoryAccess instance from a given set of + /// function addrs. + static Expected<EPCGenericDylibManager> + CreateWithDefaultBootstrapSymbols(ExecutorProcessControl &EPC); + + /// Create an EPCGenericMemoryAccess instance from a given set of + /// function addrs. + EPCGenericDylibManager(ExecutorProcessControl &EPC, SymbolAddrs SAs) + : EPC(EPC), SAs(SAs) {} + + /// Loads the dylib with the given name. + Expected<tpctypes::DylibHandle> open(StringRef Path, uint64_t Mode); + + /// Looks up symbols within the given dylib. + Expected<std::vector<ExecutorAddr>> lookup(tpctypes::DylibHandle H, + const SymbolLookupSet &Lookup); + + /// Looks up symbols within the given dylib. + Expected<std::vector<ExecutorAddr>> + lookup(tpctypes::DylibHandle H, const RemoteSymbolLookupSet &Lookup); + +private: + ExecutorProcessControl &EPC; + SymbolAddrs SAs; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICDYLIBMANAGER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h new file mode 100644 index 000000000000..b9825f17ec17 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h @@ -0,0 +1,97 @@ +//===- EPCGenericJITLinkMemoryManager.h - EPC-based mem manager -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements JITLinkMemoryManager by making remove calls via +// ExecutorProcessControl::callWrapperAsync. +// +// This simplifies the implementaton of new ExecutorProcessControl instances, +// as this implementation will always work (at the cost of some performance +// overhead for the calls). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICJITLINKMEMORYMANAGER_H +#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICJITLINKMEMORYMANAGER_H + +#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/Core.h" + +namespace llvm { +namespace orc { + +class EPCGenericJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { +public: + /// Function addresses for memory access. + struct SymbolAddrs { + ExecutorAddr Allocator; + ExecutorAddr Reserve; + ExecutorAddr Finalize; + ExecutorAddr Deallocate; + }; + + /// Create an EPCGenericJITLinkMemoryManager instance from a given set of + /// function addrs. + EPCGenericJITLinkMemoryManager(ExecutorProcessControl &EPC, SymbolAddrs SAs) + : EPC(EPC), SAs(SAs) {} + + void allocate(const jitlink::JITLinkDylib *JD, jitlink::LinkGraph &G, + OnAllocatedFunction OnAllocated) override; + + // Use overloads from base class. + using JITLinkMemoryManager::allocate; + + void deallocate(std::vector<FinalizedAlloc> Allocs, + OnDeallocatedFunction OnDeallocated) override; + + // Use overloads from base class. + using JITLinkMemoryManager::deallocate; + +private: + class InFlightAlloc; + + void completeAllocation(ExecutorAddr AllocAddr, jitlink::BasicLayout BL, + OnAllocatedFunction OnAllocated); + + ExecutorProcessControl &EPC; + SymbolAddrs SAs; +}; + +namespace shared { + +/// FIXME: This specialization should be moved into TargetProcessControlTypes.h +/// (or whereever those types get merged to) once ORC depends on JITLink. +template <> +class SPSSerializationTraits<SPSExecutorAddr, + jitlink::JITLinkMemoryManager::FinalizedAlloc> { +public: + static size_t size(const jitlink::JITLinkMemoryManager::FinalizedAlloc &FA) { + return SPSArgList<SPSExecutorAddr>::size(ExecutorAddr(FA.getAddress())); + } + + static bool + serialize(SPSOutputBuffer &OB, + const jitlink::JITLinkMemoryManager::FinalizedAlloc &FA) { + return SPSArgList<SPSExecutorAddr>::serialize( + OB, ExecutorAddr(FA.getAddress())); + } + + static bool deserialize(SPSInputBuffer &IB, + jitlink::JITLinkMemoryManager::FinalizedAlloc &FA) { + ExecutorAddr A; + if (!SPSArgList<SPSExecutorAddr>::deserialize(IB, A)) + return false; + FA = jitlink::JITLinkMemoryManager::FinalizedAlloc(A.getValue()); + return true; + } +}; + +} // end namespace shared +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICJITLINKMEMORYMANAGER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h new file mode 100644 index 000000000000..8c1d457d06ab --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h @@ -0,0 +1,85 @@ +//===- EPCGenericMemoryAccess.h - Generic EPC MemoryAccess impl -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements ExecutorProcessControl::MemoryAccess by making calls to +// ExecutorProcessControl::callWrapperAsync. +// +// This simplifies the implementaton of new ExecutorProcessControl instances, +// as this implementation will always work (at the cost of some performance +// overhead for the calls). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H +#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H + +#include "llvm/ExecutionEngine/Orc/Core.h" + +namespace llvm { +namespace orc { + +class EPCGenericMemoryAccess : public ExecutorProcessControl::MemoryAccess { +public: + /// Function addresses for memory access. + struct FuncAddrs { + ExecutorAddr WriteUInt8s; + ExecutorAddr WriteUInt16s; + ExecutorAddr WriteUInt32s; + ExecutorAddr WriteUInt64s; + ExecutorAddr WriteBuffers; + }; + + /// Create an EPCGenericMemoryAccess instance from a given set of + /// function addrs. + EPCGenericMemoryAccess(ExecutorProcessControl &EPC, FuncAddrs FAs) + : EPC(EPC), FAs(FAs) {} + + void writeUInt8sAsync(ArrayRef<tpctypes::UInt8Write> Ws, + WriteResultFn OnWriteComplete) override { + using namespace shared; + EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt8Write>)>( + FAs.WriteUInt8s, std::move(OnWriteComplete), Ws); + } + + void writeUInt16sAsync(ArrayRef<tpctypes::UInt16Write> Ws, + WriteResultFn OnWriteComplete) override { + using namespace shared; + EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt16Write>)>( + FAs.WriteUInt16s, std::move(OnWriteComplete), Ws); + } + + void writeUInt32sAsync(ArrayRef<tpctypes::UInt32Write> Ws, + WriteResultFn OnWriteComplete) override { + using namespace shared; + EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt32Write>)>( + FAs.WriteUInt32s, std::move(OnWriteComplete), Ws); + } + + void writeUInt64sAsync(ArrayRef<tpctypes::UInt64Write> Ws, + WriteResultFn OnWriteComplete) override { + using namespace shared; + EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt64Write>)>( + FAs.WriteUInt64s, std::move(OnWriteComplete), Ws); + } + + void writeBuffersAsync(ArrayRef<tpctypes::BufferWrite> Ws, + WriteResultFn OnWriteComplete) override { + using namespace shared; + EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessBufferWrite>)>( + FAs.WriteBuffers, std::move(OnWriteComplete), Ws); + } + +private: + ExecutorProcessControl &EPC; + FuncAddrs FAs; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h new file mode 100644 index 000000000000..b6fdfb92ced3 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h @@ -0,0 +1,133 @@ +//===---- EPCGenericRTDyldMemoryManager.h - EPC-based MemMgr ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines a RuntimeDyld::MemoryManager that uses EPC and the ORC runtime +// bootstrap functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICRTDYLDMEMORYMANAGER_H +#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICRTDYLDMEMORYMANAGER_H + +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" + +#define DEBUG_TYPE "orc" + +namespace llvm { +namespace orc { + +/// Remote-mapped RuntimeDyld-compatible memory manager. +class EPCGenericRTDyldMemoryManager : public RuntimeDyld::MemoryManager { +public: + /// Symbol addresses for memory access. + struct SymbolAddrs { + ExecutorAddr Instance; + ExecutorAddr Reserve; + ExecutorAddr Finalize; + ExecutorAddr Deallocate; + ExecutorAddr RegisterEHFrame; + ExecutorAddr DeregisterEHFrame; + }; + + /// Create an EPCGenericRTDyldMemoryManager using the given EPC, looking up + /// the default symbol names in the bootstrap symbol set. + static Expected<std::unique_ptr<EPCGenericRTDyldMemoryManager>> + CreateWithDefaultBootstrapSymbols(ExecutorProcessControl &EPC); + + /// Create an EPCGenericRTDyldMemoryManager using the given EPC and symbol + /// addrs. + EPCGenericRTDyldMemoryManager(ExecutorProcessControl &EPC, SymbolAddrs SAs); + + EPCGenericRTDyldMemoryManager(const EPCGenericRTDyldMemoryManager &) = delete; + EPCGenericRTDyldMemoryManager & + operator=(const EPCGenericRTDyldMemoryManager &) = delete; + EPCGenericRTDyldMemoryManager(EPCGenericRTDyldMemoryManager &&) = delete; + EPCGenericRTDyldMemoryManager & + operator=(EPCGenericRTDyldMemoryManager &&) = delete; + ~EPCGenericRTDyldMemoryManager(); + + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + StringRef SectionName) override; + + uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, StringRef SectionName, + bool IsReadOnly) override; + + void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign, + uintptr_t RODataSize, uint32_t RODataAlign, + uintptr_t RWDataSize, + uint32_t RWDataAlign) override; + + bool needsToReserveAllocationSpace() override; + + void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override; + + void deregisterEHFrames() override; + + void notifyObjectLoaded(RuntimeDyld &Dyld, + const object::ObjectFile &Obj) override; + + bool finalizeMemory(std::string *ErrMsg = nullptr) override; + +private: + struct Alloc { + public: + Alloc(uint64_t Size, unsigned Align) + : Size(Size), Align(Align), + Contents(std::make_unique<uint8_t[]>(Size + Align - 1)) {} + + uint64_t Size; + unsigned Align; + std::unique_ptr<uint8_t[]> Contents; + ExecutorAddr RemoteAddr; + }; + + struct EHFrame { + ExecutorAddr Addr; + uint64_t Size; + }; + + // Group of section allocations to be allocated together in the executor. The + // RemoteCodeAddr will stand in as the id of the group for deallocation + // purposes. + struct AllocGroup { + AllocGroup() = default; + AllocGroup(const AllocGroup &) = delete; + AllocGroup &operator=(const AllocGroup &) = delete; + AllocGroup(AllocGroup &&) = default; + AllocGroup &operator=(AllocGroup &&) = default; + + ExecutorAddrRange RemoteCode; + ExecutorAddrRange RemoteROData; + ExecutorAddrRange RemoteRWData; + std::vector<EHFrame> UnfinalizedEHFrames; + std::vector<Alloc> CodeAllocs, RODataAllocs, RWDataAllocs; + }; + + // Maps all allocations in Allocs to aligned blocks + void mapAllocsToRemoteAddrs(RuntimeDyld &Dyld, std::vector<Alloc> &Allocs, + ExecutorAddr NextAddr); + + ExecutorProcessControl &EPC; + SymbolAddrs SAs; + + std::mutex M; + std::vector<AllocGroup> Unmapped; + std::vector<AllocGroup> Unfinalized; + std::vector<ExecutorAddr> FinalizedAllocs; + std::string ErrMsg; +}; + +} // end namespace orc +} // end namespace llvm + +#undef DEBUG_TYPE + +#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICRTDYLDMEMORYMANAGER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h index 64f16d507c97..92de5882bafe 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h @@ -126,7 +126,7 @@ public: } private: - using Allocation = jitlink::JITLinkMemoryManager::Allocation; + using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc; struct IndirectStubInfo { IndirectStubInfo() = default; @@ -149,12 +149,12 @@ private: ExecutorProcessControl &EPC; std::unique_ptr<ABISupport> ABI; JITTargetAddress ResolverBlockAddr; - std::unique_ptr<jitlink::JITLinkMemoryManager::Allocation> ResolverBlock; + FinalizedAlloc ResolverBlock; std::unique_ptr<TrampolinePool> TP; std::unique_ptr<LazyCallThroughManager> LCTM; std::vector<IndirectStubInfo> AvailableIndirectStubs; - std::vector<std::unique_ptr<Allocation>> IndirectStubAllocs; + std::vector<FinalizedAlloc> IndirectStubAllocs; }; /// This will call writeResolver on the given EPCIndirectionUtils instance diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h index d540d0cd0608..105dac8e8d04 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h @@ -13,7 +13,6 @@ #ifndef LLVM_EXECUTIONENGINE_ORC_EXECUTORPROCESSCONTROL_H #define LLVM_EXECUTIONENGINE_ORC_EXECUTORPROCESSCONTROL_H -#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" @@ -21,6 +20,7 @@ #include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h" #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" +#include "llvm/ExecutionEngine/Orc/TaskDispatch.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/MSVCErrorWorkarounds.h" @@ -37,11 +37,65 @@ class SymbolLookupSet; /// ExecutorProcessControl supports interaction with a JIT target process. class ExecutorProcessControl { friend class ExecutionSession; - public: - /// Sender to return the result of a WrapperFunction executed in the JIT. - using SendResultFunction = - unique_function<void(shared::WrapperFunctionResult)>; + + /// A handler or incoming WrapperFunctionResults -- either return values from + /// callWrapper* calls, or incoming JIT-dispatch requests. + /// + /// IncomingWFRHandlers are constructible from + /// unique_function<void(shared::WrapperFunctionResult)>s using the + /// runInPlace function or a RunWithDispatch object. + class IncomingWFRHandler { + friend class ExecutorProcessControl; + public: + IncomingWFRHandler() = default; + explicit operator bool() const { return !!H; } + void operator()(shared::WrapperFunctionResult WFR) { H(std::move(WFR)); } + private: + template <typename FnT> IncomingWFRHandler(FnT &&Fn) + : H(std::forward<FnT>(Fn)) {} + + unique_function<void(shared::WrapperFunctionResult)> H; + }; + + /// Constructs an IncomingWFRHandler from a function object that is callable + /// as void(shared::WrapperFunctionResult). The function object will be called + /// directly. This should be used with care as it may block listener threads + /// in remote EPCs. It is only suitable for simple tasks (e.g. setting a + /// future), or for performing some quick analysis before dispatching "real" + /// work as a Task. + class RunInPlace { + public: + template <typename FnT> + IncomingWFRHandler operator()(FnT &&Fn) { + return IncomingWFRHandler(std::forward<FnT>(Fn)); + } + }; + + /// Constructs an IncomingWFRHandler from a function object by creating a new + /// function object that dispatches the original using a TaskDispatcher, + /// wrapping the original as a GenericNamedTask. + /// + /// This is the default approach for running WFR handlers. + class RunAsTask { + public: + RunAsTask(TaskDispatcher &D) : D(D) {} + + template <typename FnT> + IncomingWFRHandler operator()(FnT &&Fn) { + return IncomingWFRHandler( + [&D = this->D, Fn = std::move(Fn)] + (shared::WrapperFunctionResult WFR) mutable { + D.dispatch( + makeGenericNamedTask( + [Fn = std::move(Fn), WFR = std::move(WFR)]() mutable { + Fn(std::move(WFR)); + }, "WFR handler task")); + }); + } + private: + TaskDispatcher &D; + }; /// APIs for manipulating memory in the target process. class MemoryAccess { @@ -51,53 +105,58 @@ public: virtual ~MemoryAccess(); - virtual void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws, - WriteResultFn OnWriteComplete) = 0; + virtual void writeUInt8sAsync(ArrayRef<tpctypes::UInt8Write> Ws, + WriteResultFn OnWriteComplete) = 0; - virtual void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws, - WriteResultFn OnWriteComplete) = 0; + virtual void writeUInt16sAsync(ArrayRef<tpctypes::UInt16Write> Ws, + WriteResultFn OnWriteComplete) = 0; - virtual void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws, - WriteResultFn OnWriteComplete) = 0; + virtual void writeUInt32sAsync(ArrayRef<tpctypes::UInt32Write> Ws, + WriteResultFn OnWriteComplete) = 0; - virtual void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws, - WriteResultFn OnWriteComplete) = 0; + virtual void writeUInt64sAsync(ArrayRef<tpctypes::UInt64Write> Ws, + WriteResultFn OnWriteComplete) = 0; - virtual void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws, - WriteResultFn OnWriteComplete) = 0; + virtual void writeBuffersAsync(ArrayRef<tpctypes::BufferWrite> Ws, + WriteResultFn OnWriteComplete) = 0; Error writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws) { std::promise<MSVCPError> ResultP; auto ResultF = ResultP.get_future(); - writeUInt8s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + writeUInt8sAsync(Ws, + [&](Error Err) { ResultP.set_value(std::move(Err)); }); return ResultF.get(); } Error writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws) { std::promise<MSVCPError> ResultP; auto ResultF = ResultP.get_future(); - writeUInt16s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + writeUInt16sAsync(Ws, + [&](Error Err) { ResultP.set_value(std::move(Err)); }); return ResultF.get(); } Error writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws) { std::promise<MSVCPError> ResultP; auto ResultF = ResultP.get_future(); - writeUInt32s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + writeUInt32sAsync(Ws, + [&](Error Err) { ResultP.set_value(std::move(Err)); }); return ResultF.get(); } Error writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws) { std::promise<MSVCPError> ResultP; auto ResultF = ResultP.get_future(); - writeUInt64s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + writeUInt64sAsync(Ws, + [&](Error Err) { ResultP.set_value(std::move(Err)); }); return ResultF.get(); } Error writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws) { std::promise<MSVCPError> ResultP; auto ResultF = ResultP.get_future(); - writeBuffers(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); }); + writeBuffersAsync(Ws, + [&](Error Err) { ResultP.set_value(std::move(Err)); }); return ResultF.get(); } }; @@ -113,10 +172,14 @@ public: /// Contains the address of the dispatch function and context that the ORC /// runtime can use to call functions in the JIT. struct JITDispatchInfo { - ExecutorAddress JITDispatchFunctionAddress; - ExecutorAddress JITDispatchContextAddress; + ExecutorAddr JITDispatchFunction; + ExecutorAddr JITDispatchContext; }; + ExecutorProcessControl(std::shared_ptr<SymbolStringPool> SSP, + std::unique_ptr<TaskDispatcher> D) + : SSP(std::move(SSP)), D(std::move(D)) {} + virtual ~ExecutorProcessControl(); /// Return the ExecutionSession associated with this instance. @@ -132,6 +195,8 @@ public: /// Return a shared pointer to the SymbolStringPool for this instance. std::shared_ptr<SymbolStringPool> getSymbolStringPool() const { return SSP; } + TaskDispatcher &getDispatcher() { return *D; } + /// Return the Triple for the target process. const Triple &getTargetTriple() const { return TargetTriple; } @@ -153,6 +218,29 @@ public: return *MemMgr; } + /// Returns the bootstrap symbol map. + const StringMap<ExecutorAddr> &getBootstrapSymbolsMap() const { + return BootstrapSymbols; + } + + /// For each (ExecutorAddr&, StringRef) pair, looks up the string in the + /// bootstrap symbols map and writes its address to the ExecutorAddr if + /// found. If any symbol is not found then the function returns an error. + Error getBootstrapSymbols( + ArrayRef<std::pair<ExecutorAddr &, StringRef>> Pairs) const { + for (auto &KV : Pairs) { + auto I = BootstrapSymbols.find(KV.second); + if (I == BootstrapSymbols.end()) + return make_error<StringError>("Symbol \"" + KV.second + + "\" not found " + "in bootstrap symbols map", + inconvertibleErrorCode()); + + KV.first = I->second; + } + return Error::success(); + } + /// Load the dynamic library at the given path and return a handle to it. /// If LibraryPath is null this function will return the global handle for /// the target process. @@ -163,44 +251,119 @@ public: /// The result of the lookup is a 2-dimentional array of target addresses /// that correspond to the lookup order. If a required symbol is not /// found then this method will return an error. If a weakly referenced - /// symbol is not found then it be assigned a '0' value in the result. - /// that correspond to the lookup order. + /// symbol is not found then it be assigned a '0' value. virtual Expected<std::vector<tpctypes::LookupResult>> lookupSymbols(ArrayRef<LookupRequest> Request) = 0; /// Run function with a main-like signature. - virtual Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr, + virtual Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr, ArrayRef<std::string> Args) = 0; - /// Run a wrapper function in the executor. + /// Run a wrapper function in the executor. The given WFRHandler will be + /// called on the result when it is returned. /// /// The wrapper function should be callable as: /// /// \code{.cpp} /// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size); /// \endcode{.cpp} - /// - /// The given OnComplete function will be called to return the result. - virtual void callWrapperAsync(SendResultFunction OnComplete, - JITTargetAddress WrapperFnAddr, + virtual void callWrapperAsync(ExecutorAddr WrapperFnAddr, + IncomingWFRHandler OnComplete, ArrayRef<char> ArgBuffer) = 0; + /// Run a wrapper function in the executor using the given Runner to dispatch + /// OnComplete when the result is ready. + template <typename RunPolicyT, typename FnT> + void callWrapperAsync(RunPolicyT &&Runner, ExecutorAddr WrapperFnAddr, + FnT &&OnComplete, ArrayRef<char> ArgBuffer) { + callWrapperAsync( + WrapperFnAddr, Runner(std::forward<FnT>(OnComplete)), ArgBuffer); + } + + /// Run a wrapper function in the executor. OnComplete will be dispatched + /// as a GenericNamedTask using this instance's TaskDispatch object. + template <typename FnT> + void callWrapperAsync(ExecutorAddr WrapperFnAddr, FnT &&OnComplete, + ArrayRef<char> ArgBuffer) { + callWrapperAsync(RunAsTask(*D), WrapperFnAddr, + std::forward<FnT>(OnComplete), ArgBuffer); + } + + /// Run a wrapper function in the executor. The wrapper function should be + /// callable as: + /// + /// \code{.cpp} + /// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size); + /// \endcode{.cpp} + shared::WrapperFunctionResult callWrapper(ExecutorAddr WrapperFnAddr, + ArrayRef<char> ArgBuffer) { + std::promise<shared::WrapperFunctionResult> RP; + auto RF = RP.get_future(); + callWrapperAsync( + RunInPlace(), WrapperFnAddr, + [&](shared::WrapperFunctionResult R) { + RP.set_value(std::move(R)); + }, ArgBuffer); + return RF.get(); + } + + /// Run a wrapper function using SPS to serialize the arguments and + /// deserialize the results. + template <typename SPSSignature, typename RunPolicyT, typename SendResultT, + typename... ArgTs> + void callSPSWrapperAsync(RunPolicyT &&Runner, ExecutorAddr WrapperFnAddr, + SendResultT &&SendResult, const ArgTs &...Args) { + shared::WrapperFunction<SPSSignature>::callAsync( + [this, WrapperFnAddr, Runner = std::move(Runner)] + (auto &&SendResult, const char *ArgData, size_t ArgSize) mutable { + this->callWrapperAsync(std::move(Runner), WrapperFnAddr, + std::move(SendResult), + ArrayRef<char>(ArgData, ArgSize)); + }, + std::forward<SendResultT>(SendResult), Args...); + } + + /// Run a wrapper function using SPS to serialize the arguments and + /// deserialize the results. + template <typename SPSSignature, typename SendResultT, typename... ArgTs> + void callSPSWrapperAsync(ExecutorAddr WrapperFnAddr, SendResultT &&SendResult, + const ArgTs &...Args) { + callSPSWrapperAsync<SPSSignature>(RunAsTask(*D), WrapperFnAddr, + std::forward<SendResultT>(SendResult), + Args...); + } + + /// Run a wrapper function using SPS to serialize the arguments and + /// deserialize the results. + /// + /// If SPSSignature is a non-void function signature then the second argument + /// (the first in the Args list) should be a reference to a return value. + template <typename SPSSignature, typename... WrapperCallArgTs> + Error callSPSWrapper(ExecutorAddr WrapperFnAddr, + WrapperCallArgTs &&...WrapperCallArgs) { + return shared::WrapperFunction<SPSSignature>::call( + [this, WrapperFnAddr](const char *ArgData, size_t ArgSize) { + return callWrapper(WrapperFnAddr, ArrayRef<char>(ArgData, ArgSize)); + }, + std::forward<WrapperCallArgTs>(WrapperCallArgs)...); + } + /// Disconnect from the target process. /// /// This should be called after the JIT session is shut down. virtual Error disconnect() = 0; protected: - ExecutorProcessControl(std::shared_ptr<SymbolStringPool> SSP) - : SSP(std::move(SSP)) {} std::shared_ptr<SymbolStringPool> SSP; + std::unique_ptr<TaskDispatcher> D; ExecutionSession *ES = nullptr; Triple TargetTriple; unsigned PageSize = 0; JITDispatchInfo JDI; MemoryAccess *MemAccess = nullptr; jitlink::JITLinkMemoryManager *MemMgr = nullptr; + StringMap<ExecutorAddr> BootstrapSymbols; }; /// A ExecutorProcessControl instance that asserts if any of its methods are @@ -210,9 +373,12 @@ class UnsupportedExecutorProcessControl : public ExecutorProcessControl { public: UnsupportedExecutorProcessControl( std::shared_ptr<SymbolStringPool> SSP = nullptr, + std::unique_ptr<TaskDispatcher> D = nullptr, const std::string &TT = "", unsigned PageSize = 0) : ExecutorProcessControl(SSP ? std::move(SSP) - : std::make_shared<SymbolStringPool>()) { + : std::make_shared<SymbolStringPool>(), + D ? std::move(D) + : std::make_unique<InPlaceTaskDispatcher>()) { this->TargetTriple = Triple(TT); this->PageSize = PageSize; } @@ -226,13 +392,13 @@ public: llvm_unreachable("Unsupported"); } - Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr, + Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr, ArrayRef<std::string> Args) override { llvm_unreachable("Unsupported"); } - void callWrapperAsync(SendResultFunction OnComplete, - JITTargetAddress WrapperFnAddr, + void callWrapperAsync(ExecutorAddr WrapperFnAddr, + IncomingWFRHandler OnComplete, ArrayRef<char> ArgBuffer) override { llvm_unreachable("Unsupported"); } @@ -246,8 +412,9 @@ class SelfExecutorProcessControl private ExecutorProcessControl::MemoryAccess { public: SelfExecutorProcessControl( - std::shared_ptr<SymbolStringPool> SSP, Triple TargetTriple, - unsigned PageSize, std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr); + std::shared_ptr<SymbolStringPool> SSP, std::unique_ptr<TaskDispatcher> D, + Triple TargetTriple, unsigned PageSize, + std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr); /// Create a SelfExecutorProcessControl with the given symbol string pool and /// memory manager. @@ -256,6 +423,7 @@ public: /// be created and used by default. static Expected<std::unique_ptr<SelfExecutorProcessControl>> Create(std::shared_ptr<SymbolStringPool> SSP = nullptr, + std::unique_ptr<TaskDispatcher> D = nullptr, std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr = nullptr); Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override; @@ -263,32 +431,32 @@ public: Expected<std::vector<tpctypes::LookupResult>> lookupSymbols(ArrayRef<LookupRequest> Request) override; - Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr, + Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr, ArrayRef<std::string> Args) override; - void callWrapperAsync(SendResultFunction OnComplete, - JITTargetAddress WrapperFnAddr, + void callWrapperAsync(ExecutorAddr WrapperFnAddr, + IncomingWFRHandler OnComplete, ArrayRef<char> ArgBuffer) override; Error disconnect() override; private: - void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws, - WriteResultFn OnWriteComplete) override; + void writeUInt8sAsync(ArrayRef<tpctypes::UInt8Write> Ws, + WriteResultFn OnWriteComplete) override; - void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws, - WriteResultFn OnWriteComplete) override; + void writeUInt16sAsync(ArrayRef<tpctypes::UInt16Write> Ws, + WriteResultFn OnWriteComplete) override; - void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws, - WriteResultFn OnWriteComplete) override; + void writeUInt32sAsync(ArrayRef<tpctypes::UInt32Write> Ws, + WriteResultFn OnWriteComplete) override; - void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws, - WriteResultFn OnWriteComplete) override; + void writeUInt64sAsync(ArrayRef<tpctypes::UInt64Write> Ws, + WriteResultFn OnWriteComplete) override; - void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws, - WriteResultFn OnWriteComplete) override; + void writeBuffersAsync(ArrayRef<tpctypes::BufferWrite> Ws, + WriteResultFn OnWriteComplete) override; - static shared::detail::CWrapperFunctionResult + static shared::CWrapperFunctionResult jitDispatchViaWrapperFunctionManager(void *Ctx, const void *FnTag, const char *Data, size_t Size); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index 78e3ceef50e2..4d6d46595fc3 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -45,6 +45,13 @@ class PointerType; class Triple; class Twine; class Value; +class MCDisassembler; +class MCInstrAnalysis; + +namespace jitlink { +class LinkGraph; +class Symbol; +} // namespace jitlink namespace orc { @@ -557,6 +564,33 @@ GlobalAlias *cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA, void cloneModuleFlagsMetadata(Module &Dst, const Module &Src, ValueToValueMapTy &VMap); +/// Introduce relocations to \p Sym in its own definition if there are any +/// pointers formed via PC-relative address that do not already have a +/// relocation. +/// +/// This is useful when introducing indirection via a stub function at link time +/// without compiler support. If a function pointer is formed without a +/// relocation, e.g. in the definition of \c foo +/// +/// \code +/// _foo: +/// leaq -7(%rip), rax # form pointer to _foo without relocation +/// _bar: +/// leaq (%rip), %rax # uses X86_64_RELOC_SIGNED to '_foo' +/// \endcode +/// +/// the pointer to \c _foo computed by \c _foo and \c _bar may differ if we +/// introduce a stub for _foo. If the pointer is used as a key, this may be +/// observable to the program. This pass will attempt to introduce the missing +/// "self-relocation" on the leaq instruction. +/// +/// This is based on disassembly and should be considered "best effort". It may +/// silently fail to add relocations. +Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, + jitlink::LinkGraph &G, + MCDisassembler &Disassembler, + MCInstrAnalysis &MIA); + } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h b/llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h deleted file mode 100644 index f3d616deae8f..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h +++ /dev/null @@ -1,69 +0,0 @@ -//===-- LLVMSPSSerializers.h - SPS serialization for LLVM types -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// SPS Serialization for common LLVM types. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_LLVMSPSSERIALIZERS_H -#define LLVM_EXECUTIONENGINE_ORC_LLVMSPSSERIALIZERS_H - -#include "llvm/ADT/StringMap.h" -#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h" - -namespace llvm { -namespace orc { -namespace shared { - -template <typename SPSValueT, typename ValueT> -class SPSSerializationTraits<SPSSequence<SPSTuple<SPSString, SPSValueT>>, - StringMap<ValueT>> { -public: - static size_t size(const StringMap<ValueT> &M) { - size_t Sz = SPSArgList<uint64_t>::size(static_cast<uint64_t>(M.size())); - for (auto &E : M) - Sz += SPSArgList<SPSString, SPSValueT>::size(E.first(), E.second); - return Sz; - } - - static bool serialize(SPSOutputBuffer &OB, const StringMap<ValueT> &M) { - if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(M.size()))) - return false; - - for (auto &E : M) - if (!SPSArgList<SPSString, SPSValueT>::serialize(OB, E.first(), E.second)) - return false; - - return true; - } - - static bool deserialize(SPSInputBuffer &IB, StringMap<ValueT> &M) { - uint64_t Size; - assert(M.empty() && "M already contains elements"); - - if (!SPSArgList<uint64_t>::deserialize(IB, Size)) - return false; - - while (Size--) { - StringRef S; - ValueT V; - if (!SPSArgList<SPSString, SPSValueT>::deserialize(IB, S, V)) - return false; - if (!M.insert(std::make_pair(S, V)).second) - return false; - } - - return true; - } -}; - -} // end namespace shared -} // end namespace orc -} // end namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORC_LLVMSPSSERIALIZERS_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h b/llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h new file mode 100644 index 000000000000..a598405ee4f6 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h @@ -0,0 +1,70 @@ +//===-- LookupAndRecordAddrs.h - Symbol lookup support utility --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Record the addresses of a set of symbols into ExecutorAddr objects. +// +// This can be used to avoid repeated lookup (via ExecutionSession::lookup) of +// the given symbols. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_LOOKUPANDRECORDADDRS_H +#define LLVM_EXECUTIONENGINE_ORC_LOOKUPANDRECORDADDRS_H + +#include "llvm/ADT/FunctionExtras.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" + +#include <vector> + +namespace llvm { +namespace orc { + +/// Record addresses of the given symbols in the given ExecutorAddrs. +/// +/// Useful for making permanent records of symbol addreses to call or +/// access in the executor (e.g. runtime support functions in Platform +/// subclasses). +/// +/// By default the symbols are looked up using +/// SymbolLookupFlags::RequiredSymbol, and an error will be generated if any of +/// the requested symbols are not defined. +/// +/// If SymbolLookupFlags::WeaklyReferencedSymbol is used then any missing +/// symbols will have their corresponding address objects set to zero, and +/// this function will never generate an error (the caller will need to check +/// addresses before using them). +/// +/// Asynchronous version. +void lookupAndRecordAddrs( + unique_function<void(Error)> OnRecorded, ExecutionSession &ES, LookupKind K, + const JITDylibSearchOrder &SearchOrder, + std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs, + SymbolLookupFlags LookupFlags = SymbolLookupFlags::RequiredSymbol); + +/// Record addresses of the given symbols in the given ExecutorAddrs. +/// +/// Blocking version. +Error lookupAndRecordAddrs( + ExecutionSession &ES, LookupKind K, const JITDylibSearchOrder &SearchOrder, + std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs, + SymbolLookupFlags LookupFlags = SymbolLookupFlags::RequiredSymbol); + +/// Record addresses of given symbols in the given ExecutorAddrs. +/// +/// ExecutorProcessControl lookup version. Lookups are always implicitly +/// weak. +Error lookupAndRecordAddrs( + ExecutorProcessControl &EPC, tpctypes::DylibHandle H, + std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs, + SymbolLookupFlags LookupFlags = SymbolLookupFlags::RequiredSymbol); + +} // End namespace orc +} // End namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_LOOKUPANDRECORDADDRS_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h index f77dfd208413..d7b5e2eda6ee 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h @@ -16,7 +16,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" -#include "llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h" #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" #include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" @@ -27,22 +26,16 @@ namespace llvm { namespace orc { -struct MachOPerObjectSectionsToRegister { - ExecutorAddressRange EHFrameSection; - ExecutorAddressRange ThreadDataSection; -}; - struct MachOJITDylibInitializers { - using SectionList = std::vector<ExecutorAddressRange>; + using SectionList = std::vector<ExecutorAddrRange>; - MachOJITDylibInitializers(std::string Name, - ExecutorAddress MachOHeaderAddress) + MachOJITDylibInitializers(std::string Name, ExecutorAddr MachOHeaderAddress) : Name(std::move(Name)), MachOHeaderAddress(std::move(MachOHeaderAddress)) {} std::string Name; - ExecutorAddress MachOHeaderAddress; - ExecutorAddress ObjCImageInfoAddress; + ExecutorAddr MachOHeaderAddress; + ExecutorAddr ObjCImageInfoAddress; StringMap<SectionList> InitSections; }; @@ -155,15 +148,12 @@ private: using InitSymbolDepMap = DenseMap<MaterializationResponsibility *, JITLinkSymbolSet>; - void addInitializerSupportPasses(MaterializationResponsibility &MR, - jitlink::PassConfiguration &Config); - - void addMachOHeaderSupportPasses(MaterializationResponsibility &MR, - jitlink::PassConfiguration &Config); - void addEHAndTLVSupportPasses(MaterializationResponsibility &MR, jitlink::PassConfiguration &Config); + Error associateJITDylibHeaderSymbol(jitlink::LinkGraph &G, + MaterializationResponsibility &MR); + Error preserveInitSections(jitlink::LinkGraph &G, MaterializationResponsibility &MR); @@ -174,6 +164,10 @@ private: Error fixTLVSectionsAndEdges(jitlink::LinkGraph &G, JITDylib &JD); + Error registerEHAndTLVSections(jitlink::LinkGraph &G); + + Error registerEHSectionsPhase1(jitlink::LinkGraph &G); + std::mutex PluginMutex; MachOPlatform &MP; DenseMap<JITDylib *, std::pair<uint32_t, uint32_t>> ObjCImageInfos; @@ -186,7 +180,7 @@ private: using SendDeinitializerSequenceFn = unique_function<void(Expected<MachOJITDylibDeinitializerSequence>)>; - using SendSymbolAddressFn = unique_function<void(Expected<ExecutorAddress>)>; + using SendSymbolAddressFn = unique_function<void(Expected<ExecutorAddr>)>; static bool supportedTarget(const Triple &TT); @@ -209,31 +203,34 @@ private: StringRef JDName); void rt_getDeinitializers(SendDeinitializerSequenceFn SendResult, - ExecutorAddress Handle); + ExecutorAddr Handle); - void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddress Handle, + void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddr Handle, StringRef SymbolName); // Records the addresses of runtime symbols used by the platform. Error bootstrapMachORuntime(JITDylib &PlatformJD); - Error registerInitInfo(JITDylib &JD, ExecutorAddress ObjCImageInfoAddr, + Error registerInitInfo(JITDylib &JD, ExecutorAddr ObjCImageInfoAddr, ArrayRef<jitlink::Section *> InitSections); - Error registerPerObjectSections(const MachOPerObjectSectionsToRegister &POSR); - Expected<uint64_t> createPThreadKey(); + enum PlatformState { BootstrapPhase1, BootstrapPhase2, Initialized }; + ExecutionSession &ES; ObjectLinkingLayer &ObjLinkingLayer; SymbolStringPtr MachOHeaderStartSymbol; - std::atomic<bool> RuntimeBootstrapped{false}; + std::atomic<PlatformState> State{BootstrapPhase1}; - ExecutorAddress orc_rt_macho_platform_bootstrap; - ExecutorAddress orc_rt_macho_platform_shutdown; - ExecutorAddress orc_rt_macho_register_object_sections; - ExecutorAddress orc_rt_macho_create_pthread_key; + ExecutorAddr orc_rt_macho_platform_bootstrap; + ExecutorAddr orc_rt_macho_platform_shutdown; + ExecutorAddr orc_rt_macho_register_ehframe_section; + ExecutorAddr orc_rt_macho_deregister_ehframe_section; + ExecutorAddr orc_rt_macho_register_thread_data_section; + ExecutorAddr orc_rt_macho_deregister_thread_data_section; + ExecutorAddr orc_rt_macho_create_pthread_key; DenseMap<JITDylib *, SymbolLookupSet> RegisteredInitSymbols; @@ -241,7 +238,6 @@ private: // aggregating data from the jitlink. std::mutex PlatformMutex; DenseMap<JITDylib *, MachOJITDylibInitializers> InitSeqs; - std::vector<MachOPerObjectSectionsToRegister> BootstrapPOSRs; DenseMap<JITTargetAddress, JITDylib *> HeaderAddrToJITDylib; DenseMap<JITDylib *, uint64_t> JITDylibToPThreadKey; @@ -249,38 +245,12 @@ private: namespace shared { -using SPSMachOPerObjectSectionsToRegister = - SPSTuple<SPSExecutorAddressRange, SPSExecutorAddressRange>; - -template <> -class SPSSerializationTraits<SPSMachOPerObjectSectionsToRegister, - MachOPerObjectSectionsToRegister> { - -public: - static size_t size(const MachOPerObjectSectionsToRegister &MOPOSR) { - return SPSMachOPerObjectSectionsToRegister::AsArgList::size( - MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection); - } - - static bool serialize(SPSOutputBuffer &OB, - const MachOPerObjectSectionsToRegister &MOPOSR) { - return SPSMachOPerObjectSectionsToRegister::AsArgList::serialize( - OB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection); - } - - static bool deserialize(SPSInputBuffer &IB, - MachOPerObjectSectionsToRegister &MOPOSR) { - return SPSMachOPerObjectSectionsToRegister::AsArgList::deserialize( - IB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection); - } -}; - -using SPSNamedExecutorAddressRangeSequenceMap = - SPSSequence<SPSTuple<SPSString, SPSExecutorAddressRangeSequence>>; +using SPSNamedExecutorAddrRangeSequenceMap = + SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRangeSequence>>; using SPSMachOJITDylibInitializers = - SPSTuple<SPSString, SPSExecutorAddress, SPSExecutorAddress, - SPSNamedExecutorAddressRangeSequenceMap>; + SPSTuple<SPSString, SPSExecutorAddr, SPSExecutorAddr, + SPSNamedExecutorAddrRangeSequenceMap>; using SPSMachOJITDylibInitializerSequence = SPSSequence<SPSMachOJITDylibInitializers>; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h index 5632118eee4e..109922a46e26 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h @@ -184,13 +184,13 @@ public: } private: - using AllocPtr = std::unique_ptr<jitlink::JITLinkMemoryManager::Allocation>; + using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc; void modifyPassConfig(MaterializationResponsibility &MR, jitlink::LinkGraph &G, jitlink::PassConfiguration &PassConfig); void notifyLoaded(MaterializationResponsibility &MR); - Error notifyEmitted(MaterializationResponsibility &MR, AllocPtr Alloc); + Error notifyEmitted(MaterializationResponsibility &MR, FinalizedAlloc FA); Error handleRemoveResources(ResourceKey K) override; void handleTransferResources(ResourceKey DstKey, ResourceKey SrcKey) override; @@ -201,7 +201,7 @@ private: bool OverrideObjectFlags = false; bool AutoClaimObjectSymbols = false; ReturnObjectBufferFunction ReturnObjectBuffer; - DenseMap<ResourceKey, std::vector<AllocPtr>> Allocs; + DenseMap<ResourceKey, std::vector<FinalizedAlloc>> Allocs; std::vector<std::unique_ptr<Plugin>> Plugins; }; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h deleted file mode 100644 index 4310ba9ce9e0..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h +++ /dev/null @@ -1,436 +0,0 @@ -//===-- OrcRPCExecutorProcessControl.h - Remote target control --*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Executor control via ORC RPC. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H -#define LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H - -#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" -#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h" -#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h" -#include "llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h" -#include "llvm/Support/MSVCErrorWorkarounds.h" - -namespace llvm { -namespace orc { - -/// JITLinkMemoryManager implementation for a process connected via an ORC RPC -/// endpoint. -template <typename OrcRPCEPCImplT> -class OrcRPCEPCJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { -private: - struct HostAlloc { - std::unique_ptr<char[]> Mem; - uint64_t Size; - }; - - struct TargetAlloc { - JITTargetAddress Address = 0; - uint64_t AllocatedSize = 0; - }; - - using HostAllocMap = DenseMap<int, HostAlloc>; - using TargetAllocMap = DenseMap<int, TargetAlloc>; - -public: - class OrcRPCAllocation : public Allocation { - public: - OrcRPCAllocation(OrcRPCEPCJITLinkMemoryManager<OrcRPCEPCImplT> &Parent, - HostAllocMap HostAllocs, TargetAllocMap TargetAllocs) - : Parent(Parent), HostAllocs(std::move(HostAllocs)), - TargetAllocs(std::move(TargetAllocs)) { - assert(HostAllocs.size() == TargetAllocs.size() && - "HostAllocs size should match TargetAllocs"); - } - - ~OrcRPCAllocation() override { - assert(TargetAllocs.empty() && "failed to deallocate"); - } - - MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override { - auto I = HostAllocs.find(Seg); - assert(I != HostAllocs.end() && "No host allocation for segment"); - auto &HA = I->second; - return {HA.Mem.get(), static_cast<size_t>(HA.Size)}; - } - - JITTargetAddress getTargetMemory(ProtectionFlags Seg) override { - auto I = TargetAllocs.find(Seg); - assert(I != TargetAllocs.end() && "No target allocation for segment"); - return I->second.Address; - } - - void finalizeAsync(FinalizeContinuation OnFinalize) override { - - std::vector<tpctypes::BufferWrite> BufferWrites; - orcrpctpc::ReleaseOrFinalizeMemRequest FMR; - - for (auto &KV : HostAllocs) { - assert(TargetAllocs.count(KV.first) && - "No target allocation for buffer"); - auto &HA = KV.second; - auto &TA = TargetAllocs[KV.first]; - BufferWrites.push_back({TA.Address, StringRef(HA.Mem.get(), HA.Size)}); - FMR.push_back({orcrpctpc::toWireProtectionFlags( - static_cast<sys::Memory::ProtectionFlags>(KV.first)), - TA.Address, TA.AllocatedSize}); - } - - DEBUG_WITH_TYPE("orc", { - dbgs() << "finalizeAsync " << (void *)this << ":\n"; - auto FMRI = FMR.begin(); - for (auto &B : BufferWrites) { - auto Prot = FMRI->Prot; - ++FMRI; - dbgs() << " Writing " << formatv("{0:x16}", B.Buffer.size()) - << " bytes to " << ((Prot & orcrpctpc::WPF_Read) ? 'R' : '-') - << ((Prot & orcrpctpc::WPF_Write) ? 'W' : '-') - << ((Prot & orcrpctpc::WPF_Exec) ? 'X' : '-') - << " segment: local " << (const void *)B.Buffer.data() - << " -> target " << formatv("{0:x16}", B.Address) << "\n"; - } - }); - if (auto Err = - Parent.Parent.getMemoryAccess().writeBuffers(BufferWrites)) { - OnFinalize(std::move(Err)); - return; - } - - DEBUG_WITH_TYPE("orc", dbgs() << " Applying permissions...\n"); - if (auto Err = - Parent.getEndpoint().template callAsync<orcrpctpc::FinalizeMem>( - [OF = std::move(OnFinalize)](Error Err2) { - // FIXME: Dispatch to work queue. - std::thread([OF = std::move(OF), - Err3 = std::move(Err2)]() mutable { - DEBUG_WITH_TYPE( - "orc", { dbgs() << " finalizeAsync complete\n"; }); - OF(std::move(Err3)); - }).detach(); - return Error::success(); - }, - FMR)) { - DEBUG_WITH_TYPE("orc", dbgs() << " failed.\n"); - Parent.getEndpoint().abandonPendingResponses(); - Parent.reportError(std::move(Err)); - } - DEBUG_WITH_TYPE("orc", { - dbgs() << "Leaving finalizeAsync (finalization may continue in " - "background)\n"; - }); - } - - Error deallocate() override { - orcrpctpc::ReleaseOrFinalizeMemRequest RMR; - for (auto &KV : TargetAllocs) - RMR.push_back({orcrpctpc::toWireProtectionFlags( - static_cast<sys::Memory::ProtectionFlags>(KV.first)), - KV.second.Address, KV.second.AllocatedSize}); - TargetAllocs.clear(); - - return Parent.getEndpoint().template callB<orcrpctpc::ReleaseMem>(RMR); - } - - private: - OrcRPCEPCJITLinkMemoryManager<OrcRPCEPCImplT> &Parent; - HostAllocMap HostAllocs; - TargetAllocMap TargetAllocs; - }; - - OrcRPCEPCJITLinkMemoryManager(OrcRPCEPCImplT &Parent) : Parent(Parent) {} - - Expected<std::unique_ptr<Allocation>> - allocate(const jitlink::JITLinkDylib *JD, - const SegmentsRequestMap &Request) override { - orcrpctpc::ReserveMemRequest RMR; - HostAllocMap HostAllocs; - - for (auto &KV : Request) { - assert(KV.second.getContentSize() <= std::numeric_limits<size_t>::max() && - "Content size is out-of-range for host"); - - RMR.push_back({orcrpctpc::toWireProtectionFlags( - static_cast<sys::Memory::ProtectionFlags>(KV.first)), - KV.second.getContentSize() + KV.second.getZeroFillSize(), - KV.second.getAlignment()}); - HostAllocs[KV.first] = { - std::make_unique<char[]>(KV.second.getContentSize()), - KV.second.getContentSize()}; - } - - DEBUG_WITH_TYPE("orc", { - dbgs() << "Orc remote memmgr got request:\n"; - for (auto &KV : Request) - dbgs() << " permissions: " - << ((KV.first & sys::Memory::MF_READ) ? 'R' : '-') - << ((KV.first & sys::Memory::MF_WRITE) ? 'W' : '-') - << ((KV.first & sys::Memory::MF_EXEC) ? 'X' : '-') - << ", content size: " - << formatv("{0:x16}", KV.second.getContentSize()) - << " + zero-fill-size: " - << formatv("{0:x16}", KV.second.getZeroFillSize()) - << ", align: " << KV.second.getAlignment() << "\n"; - }); - - // FIXME: LLVM RPC needs to be fixed to support alt - // serialization/deserialization on return types. For now just - // translate from std::map to DenseMap manually. - auto TmpTargetAllocs = - Parent.getEndpoint().template callB<orcrpctpc::ReserveMem>(RMR); - if (!TmpTargetAllocs) - return TmpTargetAllocs.takeError(); - - if (TmpTargetAllocs->size() != RMR.size()) - return make_error<StringError>( - "Number of target allocations does not match request", - inconvertibleErrorCode()); - - TargetAllocMap TargetAllocs; - for (auto &E : *TmpTargetAllocs) - TargetAllocs[orcrpctpc::fromWireProtectionFlags(E.Prot)] = { - E.Address, E.AllocatedSize}; - - DEBUG_WITH_TYPE("orc", { - auto HAI = HostAllocs.begin(); - for (auto &KV : TargetAllocs) - dbgs() << " permissions: " - << ((KV.first & sys::Memory::MF_READ) ? 'R' : '-') - << ((KV.first & sys::Memory::MF_WRITE) ? 'W' : '-') - << ((KV.first & sys::Memory::MF_EXEC) ? 'X' : '-') - << " assigned local " << (void *)HAI->second.Mem.get() - << ", target " << formatv("{0:x16}", KV.second.Address) << "\n"; - }); - - return std::make_unique<OrcRPCAllocation>(*this, std::move(HostAllocs), - std::move(TargetAllocs)); - } - -private: - void reportError(Error Err) { Parent.reportError(std::move(Err)); } - - decltype(std::declval<OrcRPCEPCImplT>().getEndpoint()) getEndpoint() { - return Parent.getEndpoint(); - } - - OrcRPCEPCImplT &Parent; -}; - -/// ExecutorProcessControl::MemoryAccess implementation for a process connected -/// via an ORC RPC endpoint. -template <typename OrcRPCEPCImplT> -class OrcRPCEPCMemoryAccess : public ExecutorProcessControl::MemoryAccess { -public: - OrcRPCEPCMemoryAccess(OrcRPCEPCImplT &Parent) : Parent(Parent) {} - - void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws, - WriteResultFn OnWriteComplete) override { - writeViaRPC<orcrpctpc::WriteUInt8s>(Ws, std::move(OnWriteComplete)); - } - - void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws, - WriteResultFn OnWriteComplete) override { - writeViaRPC<orcrpctpc::WriteUInt16s>(Ws, std::move(OnWriteComplete)); - } - - void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws, - WriteResultFn OnWriteComplete) override { - writeViaRPC<orcrpctpc::WriteUInt32s>(Ws, std::move(OnWriteComplete)); - } - - void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws, - WriteResultFn OnWriteComplete) override { - writeViaRPC<orcrpctpc::WriteUInt64s>(Ws, std::move(OnWriteComplete)); - } - - void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws, - WriteResultFn OnWriteComplete) override { - writeViaRPC<orcrpctpc::WriteBuffers>(Ws, std::move(OnWriteComplete)); - } - -private: - template <typename WriteRPCFunction, typename WriteElementT> - void writeViaRPC(ArrayRef<WriteElementT> Ws, WriteResultFn OnWriteComplete) { - if (auto Err = Parent.getEndpoint().template callAsync<WriteRPCFunction>( - [OWC = std::move(OnWriteComplete)](Error Err2) mutable -> Error { - OWC(std::move(Err2)); - return Error::success(); - }, - Ws)) { - Parent.reportError(std::move(Err)); - Parent.getEndpoint().abandonPendingResponses(); - } - } - - OrcRPCEPCImplT &Parent; -}; - -// ExecutorProcessControl for a process connected via an ORC RPC Endpoint. -template <typename RPCEndpointT> -class OrcRPCExecutorProcessControlBase : public ExecutorProcessControl { -public: - using ErrorReporter = unique_function<void(Error)>; - - using OnCloseConnectionFunction = unique_function<Error(Error)>; - - OrcRPCExecutorProcessControlBase(std::shared_ptr<SymbolStringPool> SSP, - RPCEndpointT &EP, ErrorReporter ReportError) - : ExecutorProcessControl(std::move(SSP)), - ReportError(std::move(ReportError)), EP(EP) { - using ThisT = OrcRPCExecutorProcessControlBase<RPCEndpointT>; - EP.template addAsyncHandler<orcrpctpc::RunWrapper>(*this, - &ThisT::runWrapperInJIT); - } - - void reportError(Error Err) { ReportError(std::move(Err)); } - - RPCEndpointT &getEndpoint() { return EP; } - - Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override { - DEBUG_WITH_TYPE("orc", { - dbgs() << "Loading dylib \"" << (DylibPath ? DylibPath : "") << "\" "; - if (!DylibPath) - dbgs() << "(process symbols)"; - dbgs() << "\n"; - }); - if (!DylibPath) - DylibPath = ""; - auto H = EP.template callB<orcrpctpc::LoadDylib>(DylibPath); - DEBUG_WITH_TYPE("orc", { - if (H) - dbgs() << " got handle " << formatv("{0:x16}", *H) << "\n"; - else - dbgs() << " error, unable to load\n"; - }); - return H; - } - - Expected<std::vector<tpctypes::LookupResult>> - lookupSymbols(ArrayRef<LookupRequest> Request) override { - std::vector<orcrpctpc::RemoteLookupRequest> RR; - for (auto &E : Request) { - RR.push_back({}); - RR.back().first = E.Handle; - for (auto &KV : E.Symbols) - RR.back().second.push_back( - {(*KV.first).str(), - KV.second == SymbolLookupFlags::WeaklyReferencedSymbol}); - } - DEBUG_WITH_TYPE("orc", { - dbgs() << "Compound lookup:\n"; - for (auto &R : Request) { - dbgs() << " In " << formatv("{0:x16}", R.Handle) << ": {"; - bool First = true; - for (auto &KV : R.Symbols) { - dbgs() << (First ? "" : ",") << " " << *KV.first; - First = false; - } - dbgs() << " }\n"; - } - }); - return EP.template callB<orcrpctpc::LookupSymbols>(RR); - } - - Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr, - ArrayRef<std::string> Args) override { - DEBUG_WITH_TYPE("orc", { - dbgs() << "Running as main: " << formatv("{0:x16}", MainFnAddr) - << ", args = ["; - for (unsigned I = 0; I != Args.size(); ++I) - dbgs() << (I ? "," : "") << " \"" << Args[I] << "\""; - dbgs() << "]\n"; - }); - auto Result = EP.template callB<orcrpctpc::RunMain>(MainFnAddr, Args); - DEBUG_WITH_TYPE("orc", { - dbgs() << " call to " << formatv("{0:x16}", MainFnAddr); - if (Result) - dbgs() << " returned result " << *Result << "\n"; - else - dbgs() << " failed\n"; - }); - return Result; - } - - void callWrapperAsync(SendResultFunction OnComplete, - JITTargetAddress WrapperFnAddr, - ArrayRef<char> ArgBuffer) override { - DEBUG_WITH_TYPE("orc", { - dbgs() << "Running as wrapper function " - << formatv("{0:x16}", WrapperFnAddr) << " with " - << formatv("{0:x16}", ArgBuffer.size()) << " argument buffer\n"; - }); - auto Result = EP.template callB<orcrpctpc::RunWrapper>( - WrapperFnAddr, - ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(ArgBuffer.data()), - ArgBuffer.size())); - - if (!Result) - OnComplete(shared::WrapperFunctionResult::createOutOfBandError( - toString(Result.takeError()))); - OnComplete(std::move(*Result)); - } - - Error closeConnection(OnCloseConnectionFunction OnCloseConnection) { - DEBUG_WITH_TYPE("orc", dbgs() << "Closing connection to remote\n"); - return EP.template callAsync<orcrpctpc::CloseConnection>( - std::move(OnCloseConnection)); - } - - Error closeConnectionAndWait() { - std::promise<MSVCPError> P; - auto F = P.get_future(); - if (auto Err = closeConnection([&](Error Err2) -> Error { - P.set_value(std::move(Err2)); - return Error::success(); - })) { - EP.abandonAllPendingResponses(); - return joinErrors(std::move(Err), F.get()); - } - return F.get(); - } - -protected: - /// Subclasses must call this during construction to initialize the - /// TargetTriple and PageSize members. - Error initializeORCRPCEPCBase() { - if (auto EPI = EP.template callB<orcrpctpc::GetExecutorProcessInfo>()) { - this->TargetTriple = Triple(EPI->Triple); - this->PageSize = PageSize; - this->JDI = {ExecutorAddress(EPI->DispatchFuncAddr), - ExecutorAddress(EPI->DispatchCtxAddr)}; - return Error::success(); - } else - return EPI.takeError(); - } - -private: - Error runWrapperInJIT( - std::function<Error(Expected<shared::WrapperFunctionResult>)> SendResult, - JITTargetAddress FunctionTag, std::vector<uint8_t> ArgBuffer) { - - getExecutionSession().runJITDispatchHandler( - [this, SendResult = std::move(SendResult)]( - Expected<shared::WrapperFunctionResult> R) { - if (auto Err = SendResult(std::move(R))) - ReportError(std::move(Err)); - }, - FunctionTag, - {reinterpret_cast<const char *>(ArgBuffer.data()), ArgBuffer.size()}); - return Error::success(); - } - - ErrorReporter ReportError; - RPCEndpointT &EP; -}; - -} // end namespace orc -} // end namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h deleted file mode 100644 index 3d139740d677..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h +++ /dev/null @@ -1,925 +0,0 @@ -//===- OrcRemoteTargetClient.h - Orc Remote-target Client -------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the OrcRemoteTargetClient class and helpers. This class -// can be used to communicate over an RawByteChannel with an -// OrcRemoteTargetServer instance to support remote-JITing. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H -#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H - -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h" -#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" -#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h" -#include "llvm/ExecutionEngine/RuntimeDyld.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/Memory.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <cassert> -#include <cstdint> -#include <memory> -#include <string> -#include <tuple> -#include <utility> -#include <vector> - -#define DEBUG_TYPE "orc-remote" - -namespace llvm { -namespace orc { -namespace remote { - -/// This class provides utilities (including memory manager, indirect stubs -/// manager, and compile callback manager types) that support remote JITing -/// in ORC. -/// -/// Each of the utility classes talks to a JIT server (an instance of the -/// OrcRemoteTargetServer class) via an RPC system (see RPCUtils.h) to carry out -/// its actions. -class OrcRemoteTargetClient - : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> { -public: - /// Remote-mapped RuntimeDyld-compatible memory manager. - class RemoteRTDyldMemoryManager : public RuntimeDyld::MemoryManager { - friend class OrcRemoteTargetClient; - - public: - ~RemoteRTDyldMemoryManager() { - Client.destroyRemoteAllocator(Id); - LLVM_DEBUG(dbgs() << "Destroyed remote allocator " << Id << "\n"); - } - - RemoteRTDyldMemoryManager(const RemoteRTDyldMemoryManager &) = delete; - RemoteRTDyldMemoryManager & - operator=(const RemoteRTDyldMemoryManager &) = delete; - RemoteRTDyldMemoryManager(RemoteRTDyldMemoryManager &&) = default; - RemoteRTDyldMemoryManager &operator=(RemoteRTDyldMemoryManager &&) = delete; - - uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID, - StringRef SectionName) override { - Unmapped.back().CodeAllocs.emplace_back(Size, Alignment); - uint8_t *Alloc = reinterpret_cast<uint8_t *>( - Unmapped.back().CodeAllocs.back().getLocalAddress()); - LLVM_DEBUG(dbgs() << "Allocator " << Id << " allocated code for " - << SectionName << ": " << Alloc << " (" << Size - << " bytes, alignment " << Alignment << ")\n"); - return Alloc; - } - - uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID, StringRef SectionName, - bool IsReadOnly) override { - if (IsReadOnly) { - Unmapped.back().RODataAllocs.emplace_back(Size, Alignment); - uint8_t *Alloc = reinterpret_cast<uint8_t *>( - Unmapped.back().RODataAllocs.back().getLocalAddress()); - LLVM_DEBUG(dbgs() << "Allocator " << Id << " allocated ro-data for " - << SectionName << ": " << Alloc << " (" << Size - << " bytes, alignment " << Alignment << ")\n"); - return Alloc; - } // else... - - Unmapped.back().RWDataAllocs.emplace_back(Size, Alignment); - uint8_t *Alloc = reinterpret_cast<uint8_t *>( - Unmapped.back().RWDataAllocs.back().getLocalAddress()); - LLVM_DEBUG(dbgs() << "Allocator " << Id << " allocated rw-data for " - << SectionName << ": " << Alloc << " (" << Size - << " bytes, alignment " << Alignment << ")\n"); - return Alloc; - } - - void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign, - uintptr_t RODataSize, uint32_t RODataAlign, - uintptr_t RWDataSize, - uint32_t RWDataAlign) override { - Unmapped.push_back(ObjectAllocs()); - - LLVM_DEBUG(dbgs() << "Allocator " << Id << " reserved:\n"); - - if (CodeSize != 0) { - Unmapped.back().RemoteCodeAddr = - Client.reserveMem(Id, CodeSize, CodeAlign); - - LLVM_DEBUG( - dbgs() << " code: " - << format("0x%016" PRIx64, Unmapped.back().RemoteCodeAddr) - << " (" << CodeSize << " bytes, alignment " << CodeAlign - << ")\n"); - } - - if (RODataSize != 0) { - Unmapped.back().RemoteRODataAddr = - Client.reserveMem(Id, RODataSize, RODataAlign); - - LLVM_DEBUG( - dbgs() << " ro-data: " - << format("0x%016" PRIx64, Unmapped.back().RemoteRODataAddr) - << " (" << RODataSize << " bytes, alignment " << RODataAlign - << ")\n"); - } - - if (RWDataSize != 0) { - Unmapped.back().RemoteRWDataAddr = - Client.reserveMem(Id, RWDataSize, RWDataAlign); - - LLVM_DEBUG( - dbgs() << " rw-data: " - << format("0x%016" PRIx64, Unmapped.back().RemoteRWDataAddr) - << " (" << RWDataSize << " bytes, alignment " << RWDataAlign - << ")\n"); - } - } - - bool needsToReserveAllocationSpace() override { return true; } - - void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, - size_t Size) override { - UnfinalizedEHFrames.push_back({LoadAddr, Size}); - } - - void deregisterEHFrames() override { - for (auto &Frame : RegisteredEHFrames) { - // FIXME: Add error poll. - Client.deregisterEHFrames(Frame.Addr, Frame.Size); - } - } - - void notifyObjectLoaded(RuntimeDyld &Dyld, - const object::ObjectFile &Obj) override { - LLVM_DEBUG(dbgs() << "Allocator " << Id << " applied mappings:\n"); - for (auto &ObjAllocs : Unmapped) { - mapAllocsToRemoteAddrs(Dyld, ObjAllocs.CodeAllocs, - ObjAllocs.RemoteCodeAddr); - mapAllocsToRemoteAddrs(Dyld, ObjAllocs.RODataAllocs, - ObjAllocs.RemoteRODataAddr); - mapAllocsToRemoteAddrs(Dyld, ObjAllocs.RWDataAllocs, - ObjAllocs.RemoteRWDataAddr); - Unfinalized.push_back(std::move(ObjAllocs)); - } - Unmapped.clear(); - } - - bool finalizeMemory(std::string *ErrMsg = nullptr) override { - LLVM_DEBUG(dbgs() << "Allocator " << Id << " finalizing:\n"); - - for (auto &ObjAllocs : Unfinalized) { - if (copyAndProtect(ObjAllocs.CodeAllocs, ObjAllocs.RemoteCodeAddr, - sys::Memory::MF_READ | sys::Memory::MF_EXEC)) - return true; - - if (copyAndProtect(ObjAllocs.RODataAllocs, ObjAllocs.RemoteRODataAddr, - sys::Memory::MF_READ)) - return true; - - if (copyAndProtect(ObjAllocs.RWDataAllocs, ObjAllocs.RemoteRWDataAddr, - sys::Memory::MF_READ | sys::Memory::MF_WRITE)) - return true; - } - Unfinalized.clear(); - - for (auto &EHFrame : UnfinalizedEHFrames) { - if (auto Err = Client.registerEHFrames(EHFrame.Addr, EHFrame.Size)) { - // FIXME: Replace this once finalizeMemory can return an Error. - handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) { - if (ErrMsg) { - raw_string_ostream ErrOut(*ErrMsg); - EIB.log(ErrOut); - } - }); - return false; - } - } - RegisteredEHFrames = std::move(UnfinalizedEHFrames); - UnfinalizedEHFrames = {}; - - return false; - } - - private: - class Alloc { - public: - Alloc(uint64_t Size, unsigned Align) - : Size(Size), Align(Align), Contents(new char[Size + Align - 1]) {} - - Alloc(const Alloc &) = delete; - Alloc &operator=(const Alloc &) = delete; - Alloc(Alloc &&) = default; - Alloc &operator=(Alloc &&) = default; - - uint64_t getSize() const { return Size; } - - unsigned getAlign() const { return Align; } - - char *getLocalAddress() const { - uintptr_t LocalAddr = reinterpret_cast<uintptr_t>(Contents.get()); - LocalAddr = alignTo(LocalAddr, Align); - return reinterpret_cast<char *>(LocalAddr); - } - - void setRemoteAddress(JITTargetAddress RemoteAddr) { - this->RemoteAddr = RemoteAddr; - } - - JITTargetAddress getRemoteAddress() const { return RemoteAddr; } - - private: - uint64_t Size; - unsigned Align; - std::unique_ptr<char[]> Contents; - JITTargetAddress RemoteAddr = 0; - }; - - struct ObjectAllocs { - ObjectAllocs() = default; - ObjectAllocs(const ObjectAllocs &) = delete; - ObjectAllocs &operator=(const ObjectAllocs &) = delete; - ObjectAllocs(ObjectAllocs &&) = default; - ObjectAllocs &operator=(ObjectAllocs &&) = default; - - JITTargetAddress RemoteCodeAddr = 0; - JITTargetAddress RemoteRODataAddr = 0; - JITTargetAddress RemoteRWDataAddr = 0; - std::vector<Alloc> CodeAllocs, RODataAllocs, RWDataAllocs; - }; - - RemoteRTDyldMemoryManager(OrcRemoteTargetClient &Client, - ResourceIdMgr::ResourceId Id) - : Client(Client), Id(Id) { - LLVM_DEBUG(dbgs() << "Created remote allocator " << Id << "\n"); - } - - // Maps all allocations in Allocs to aligned blocks - void mapAllocsToRemoteAddrs(RuntimeDyld &Dyld, std::vector<Alloc> &Allocs, - JITTargetAddress NextAddr) { - for (auto &Alloc : Allocs) { - NextAddr = alignTo(NextAddr, Alloc.getAlign()); - Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextAddr); - LLVM_DEBUG( - dbgs() << " " << static_cast<void *>(Alloc.getLocalAddress()) - << " -> " << format("0x%016" PRIx64, NextAddr) << "\n"); - Alloc.setRemoteAddress(NextAddr); - - // Only advance NextAddr if it was non-null to begin with, - // otherwise leave it as null. - if (NextAddr) - NextAddr += Alloc.getSize(); - } - } - - // Copies data for each alloc in the list, then set permissions on the - // segment. - bool copyAndProtect(const std::vector<Alloc> &Allocs, - JITTargetAddress RemoteSegmentAddr, - unsigned Permissions) { - if (RemoteSegmentAddr) { - assert(!Allocs.empty() && "No sections in allocated segment"); - - for (auto &Alloc : Allocs) { - LLVM_DEBUG(dbgs() << " copying section: " - << static_cast<void *>(Alloc.getLocalAddress()) - << " -> " - << format("0x%016" PRIx64, Alloc.getRemoteAddress()) - << " (" << Alloc.getSize() << " bytes)\n";); - - if (Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(), - Alloc.getSize())) - return true; - } - - LLVM_DEBUG(dbgs() << " setting " - << (Permissions & sys::Memory::MF_READ ? 'R' : '-') - << (Permissions & sys::Memory::MF_WRITE ? 'W' : '-') - << (Permissions & sys::Memory::MF_EXEC ? 'X' : '-') - << " permissions on block: " - << format("0x%016" PRIx64, RemoteSegmentAddr) - << "\n"); - if (Client.setProtections(Id, RemoteSegmentAddr, Permissions)) - return true; - } - return false; - } - - OrcRemoteTargetClient &Client; - ResourceIdMgr::ResourceId Id; - std::vector<ObjectAllocs> Unmapped; - std::vector<ObjectAllocs> Unfinalized; - - struct EHFrame { - JITTargetAddress Addr; - uint64_t Size; - }; - std::vector<EHFrame> UnfinalizedEHFrames; - std::vector<EHFrame> RegisteredEHFrames; - }; - - class RPCMMAlloc : public jitlink::JITLinkMemoryManager::Allocation { - using AllocationMap = DenseMap<unsigned, sys::MemoryBlock>; - using FinalizeContinuation = - jitlink::JITLinkMemoryManager::Allocation::FinalizeContinuation; - using ProtectionFlags = sys::Memory::ProtectionFlags; - using SegmentsRequestMap = - DenseMap<unsigned, jitlink::JITLinkMemoryManager::SegmentRequest>; - - RPCMMAlloc(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id) - : Client(Client), Id(Id) {} - - public: - static Expected<std::unique_ptr<RPCMMAlloc>> - Create(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id, - const SegmentsRequestMap &Request) { - auto *MM = new RPCMMAlloc(Client, Id); - - if (Error Err = MM->allocateHostBlocks(Request)) - return std::move(Err); - - if (Error Err = MM->allocateTargetBlocks()) - return std::move(Err); - - return std::unique_ptr<RPCMMAlloc>(MM); - } - - MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override { - assert(HostSegBlocks.count(Seg) && "No allocation for segment"); - return {static_cast<char *>(HostSegBlocks[Seg].base()), - HostSegBlocks[Seg].allocatedSize()}; - } - - JITTargetAddress getTargetMemory(ProtectionFlags Seg) override { - assert(TargetSegBlocks.count(Seg) && "No allocation for segment"); - return pointerToJITTargetAddress(TargetSegBlocks[Seg].base()); - } - - void finalizeAsync(FinalizeContinuation OnFinalize) override { - // Host allocations (working memory) remain ReadWrite. - OnFinalize(copyAndProtect()); - } - - Error deallocate() override { - // TODO: Cannot release target allocation. RPCAPI has no function - // symmetric to reserveMem(). Add RPC call like freeMem()? - return errorCodeToError(sys::Memory::releaseMappedMemory(HostAllocation)); - } - - private: - OrcRemoteTargetClient &Client; - ResourceIdMgr::ResourceId Id; - AllocationMap HostSegBlocks; - AllocationMap TargetSegBlocks; - JITTargetAddress TargetSegmentAddr; - sys::MemoryBlock HostAllocation; - - Error allocateHostBlocks(const SegmentsRequestMap &Request) { - unsigned TargetPageSize = Client.getPageSize(); - - if (!isPowerOf2_64(static_cast<uint64_t>(TargetPageSize))) - return make_error<StringError>("Host page size is not a power of 2", - inconvertibleErrorCode()); - - auto TotalSize = calcTotalAllocSize(Request, TargetPageSize); - if (!TotalSize) - return TotalSize.takeError(); - - // Allocate one slab to cover all the segments. - const sys::Memory::ProtectionFlags ReadWrite = - static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ | - sys::Memory::MF_WRITE); - std::error_code EC; - HostAllocation = - sys::Memory::allocateMappedMemory(*TotalSize, nullptr, ReadWrite, EC); - if (EC) - return errorCodeToError(EC); - - char *SlabAddr = static_cast<char *>(HostAllocation.base()); -#ifndef NDEBUG - char *SlabAddrEnd = SlabAddr + HostAllocation.allocatedSize(); -#endif - - // Allocate segment memory from the slab. - for (auto &KV : Request) { - const auto &Seg = KV.second; - - uint64_t SegmentSize = Seg.getContentSize() + Seg.getZeroFillSize(); - uint64_t AlignedSegmentSize = alignTo(SegmentSize, TargetPageSize); - - // Zero out zero-fill memory. - char *ZeroFillBegin = SlabAddr + Seg.getContentSize(); - memset(ZeroFillBegin, 0, Seg.getZeroFillSize()); - - // Record the block for this segment. - HostSegBlocks[KV.first] = - sys::MemoryBlock(SlabAddr, AlignedSegmentSize); - - SlabAddr += AlignedSegmentSize; - assert(SlabAddr <= SlabAddrEnd && "Out of range"); - } - - return Error::success(); - } - - Error allocateTargetBlocks() { - // Reserve memory for all blocks on the target. We need as much space on - // the target as we allocated on the host. - TargetSegmentAddr = Client.reserveMem(Id, HostAllocation.allocatedSize(), - Client.getPageSize()); - if (!TargetSegmentAddr) - return make_error<StringError>("Failed to reserve memory on the target", - inconvertibleErrorCode()); - - // Map memory blocks into the allocation, that match the host allocation. - JITTargetAddress TargetAllocAddr = TargetSegmentAddr; - for (const auto &KV : HostSegBlocks) { - size_t TargetAllocSize = KV.second.allocatedSize(); - - TargetSegBlocks[KV.first] = - sys::MemoryBlock(jitTargetAddressToPointer<void *>(TargetAllocAddr), - TargetAllocSize); - - TargetAllocAddr += TargetAllocSize; - assert(TargetAllocAddr - TargetSegmentAddr <= - HostAllocation.allocatedSize() && - "Out of range on target"); - } - - return Error::success(); - } - - Error copyAndProtect() { - unsigned Permissions = 0u; - - // Copy segments one by one. - for (auto &KV : TargetSegBlocks) { - Permissions |= KV.first; - - const sys::MemoryBlock &TargetBlock = KV.second; - const sys::MemoryBlock &HostBlock = HostSegBlocks.lookup(KV.first); - - size_t TargetAllocSize = TargetBlock.allocatedSize(); - auto TargetAllocAddr = pointerToJITTargetAddress(TargetBlock.base()); - auto *HostAllocBegin = static_cast<const char *>(HostBlock.base()); - - bool CopyErr = - Client.writeMem(TargetAllocAddr, HostAllocBegin, TargetAllocSize); - if (CopyErr) - return createStringError(inconvertibleErrorCode(), - "Failed to copy %d segment to the target", - KV.first); - } - - // Set permission flags for all segments at once. - bool ProtectErr = - Client.setProtections(Id, TargetSegmentAddr, Permissions); - if (ProtectErr) - return createStringError(inconvertibleErrorCode(), - "Failed to apply permissions for %d segment " - "on the target", - Permissions); - return Error::success(); - } - - static Expected<size_t> - calcTotalAllocSize(const SegmentsRequestMap &Request, - unsigned TargetPageSize) { - size_t TotalSize = 0; - for (const auto &KV : Request) { - const auto &Seg = KV.second; - - if (Seg.getAlignment() > TargetPageSize) - return make_error<StringError>("Cannot request alignment higher than " - "page alignment on target", - inconvertibleErrorCode()); - - TotalSize = alignTo(TotalSize, TargetPageSize); - TotalSize += Seg.getContentSize(); - TotalSize += Seg.getZeroFillSize(); - } - - return TotalSize; - } - }; - - class RemoteJITLinkMemoryManager : public jitlink::JITLinkMemoryManager { - public: - RemoteJITLinkMemoryManager(OrcRemoteTargetClient &Client, - ResourceIdMgr::ResourceId Id) - : Client(Client), Id(Id) {} - - RemoteJITLinkMemoryManager(const RemoteJITLinkMemoryManager &) = delete; - RemoteJITLinkMemoryManager(RemoteJITLinkMemoryManager &&) = default; - - RemoteJITLinkMemoryManager & - operator=(const RemoteJITLinkMemoryManager &) = delete; - RemoteJITLinkMemoryManager & - operator=(RemoteJITLinkMemoryManager &&) = delete; - - ~RemoteJITLinkMemoryManager() { - Client.destroyRemoteAllocator(Id); - LLVM_DEBUG(dbgs() << "Destroyed remote allocator " << Id << "\n"); - } - - Expected<std::unique_ptr<Allocation>> - allocate(const jitlink::JITLinkDylib *JD, - const SegmentsRequestMap &Request) override { - return RPCMMAlloc::Create(Client, Id, Request); - } - - private: - OrcRemoteTargetClient &Client; - ResourceIdMgr::ResourceId Id; - }; - - /// Remote indirect stubs manager. - class RemoteIndirectStubsManager : public IndirectStubsManager { - public: - RemoteIndirectStubsManager(OrcRemoteTargetClient &Client, - ResourceIdMgr::ResourceId Id) - : Client(Client), Id(Id) {} - - ~RemoteIndirectStubsManager() override { - Client.destroyIndirectStubsManager(Id); - } - - Error createStub(StringRef StubName, JITTargetAddress StubAddr, - JITSymbolFlags StubFlags) override { - if (auto Err = reserveStubs(1)) - return Err; - - return createStubInternal(StubName, StubAddr, StubFlags); - } - - Error createStubs(const StubInitsMap &StubInits) override { - if (auto Err = reserveStubs(StubInits.size())) - return Err; - - for (auto &Entry : StubInits) - if (auto Err = createStubInternal(Entry.first(), Entry.second.first, - Entry.second.second)) - return Err; - - return Error::success(); - } - - JITEvaluatedSymbol findStub(StringRef Name, bool ExportedStubsOnly) override { - auto I = StubIndexes.find(Name); - if (I == StubIndexes.end()) - return nullptr; - auto Key = I->second.first; - auto Flags = I->second.second; - auto StubSymbol = JITEvaluatedSymbol(getStubAddr(Key), Flags); - if (ExportedStubsOnly && !StubSymbol.getFlags().isExported()) - return nullptr; - return StubSymbol; - } - - JITEvaluatedSymbol findPointer(StringRef Name) override { - auto I = StubIndexes.find(Name); - if (I == StubIndexes.end()) - return nullptr; - auto Key = I->second.first; - auto Flags = I->second.second; - return JITEvaluatedSymbol(getPtrAddr(Key), Flags); - } - - Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override { - auto I = StubIndexes.find(Name); - assert(I != StubIndexes.end() && "No stub pointer for symbol"); - auto Key = I->second.first; - return Client.writePointer(getPtrAddr(Key), NewAddr); - } - - private: - struct RemoteIndirectStubsInfo { - JITTargetAddress StubBase; - JITTargetAddress PtrBase; - unsigned NumStubs; - }; - - using StubKey = std::pair<uint16_t, uint16_t>; - - Error reserveStubs(unsigned NumStubs) { - if (NumStubs <= FreeStubs.size()) - return Error::success(); - - unsigned NewStubsRequired = NumStubs - FreeStubs.size(); - JITTargetAddress StubBase; - JITTargetAddress PtrBase; - unsigned NumStubsEmitted; - - if (auto StubInfoOrErr = Client.emitIndirectStubs(Id, NewStubsRequired)) - std::tie(StubBase, PtrBase, NumStubsEmitted) = *StubInfoOrErr; - else - return StubInfoOrErr.takeError(); - - unsigned NewBlockId = RemoteIndirectStubsInfos.size(); - RemoteIndirectStubsInfos.push_back({StubBase, PtrBase, NumStubsEmitted}); - - for (unsigned I = 0; I < NumStubsEmitted; ++I) - FreeStubs.push_back(std::make_pair(NewBlockId, I)); - - return Error::success(); - } - - Error createStubInternal(StringRef StubName, JITTargetAddress InitAddr, - JITSymbolFlags StubFlags) { - auto Key = FreeStubs.back(); - FreeStubs.pop_back(); - StubIndexes[StubName] = std::make_pair(Key, StubFlags); - return Client.writePointer(getPtrAddr(Key), InitAddr); - } - - JITTargetAddress getStubAddr(StubKey K) { - assert(RemoteIndirectStubsInfos[K.first].StubBase != 0 && - "Missing stub address"); - return RemoteIndirectStubsInfos[K.first].StubBase + - K.second * Client.getIndirectStubSize(); - } - - JITTargetAddress getPtrAddr(StubKey K) { - assert(RemoteIndirectStubsInfos[K.first].PtrBase != 0 && - "Missing pointer address"); - return RemoteIndirectStubsInfos[K.first].PtrBase + - K.second * Client.getPointerSize(); - } - - OrcRemoteTargetClient &Client; - ResourceIdMgr::ResourceId Id; - std::vector<RemoteIndirectStubsInfo> RemoteIndirectStubsInfos; - std::vector<StubKey> FreeStubs; - StringMap<std::pair<StubKey, JITSymbolFlags>> StubIndexes; - }; - - class RemoteTrampolinePool : public TrampolinePool { - public: - RemoteTrampolinePool(OrcRemoteTargetClient &Client) : Client(Client) {} - - private: - Error grow() override { - JITTargetAddress BlockAddr = 0; - uint32_t NumTrampolines = 0; - if (auto TrampolineInfoOrErr = Client.emitTrampolineBlock()) - std::tie(BlockAddr, NumTrampolines) = *TrampolineInfoOrErr; - else - return TrampolineInfoOrErr.takeError(); - - uint32_t TrampolineSize = Client.getTrampolineSize(); - for (unsigned I = 0; I < NumTrampolines; ++I) - AvailableTrampolines.push_back(BlockAddr + (I * TrampolineSize)); - - return Error::success(); - } - - OrcRemoteTargetClient &Client; - }; - - /// Remote compile callback manager. - class RemoteCompileCallbackManager : public JITCompileCallbackManager { - public: - RemoteCompileCallbackManager(OrcRemoteTargetClient &Client, - ExecutionSession &ES, - JITTargetAddress ErrorHandlerAddress) - : JITCompileCallbackManager( - std::make_unique<RemoteTrampolinePool>(Client), ES, - ErrorHandlerAddress) {} - }; - - /// Create an OrcRemoteTargetClient. - /// Channel is the ChannelT instance to communicate on. It is assumed that - /// the channel is ready to be read from and written to. - static Expected<std::unique_ptr<OrcRemoteTargetClient>> - Create(shared::RawByteChannel &Channel, ExecutionSession &ES) { - Error Err = Error::success(); - auto Client = std::unique_ptr<OrcRemoteTargetClient>( - new OrcRemoteTargetClient(Channel, ES, Err)); - if (Err) - return std::move(Err); - return std::move(Client); - } - - /// Call the int(void) function at the given address in the target and return - /// its result. - Expected<int> callIntVoid(JITTargetAddress Addr) { - LLVM_DEBUG(dbgs() << "Calling int(*)(void) " - << format("0x%016" PRIx64, Addr) << "\n"); - return callB<exec::CallIntVoid>(Addr); - } - - /// Call the int(int) function at the given address in the target and return - /// its result. - Expected<int> callIntInt(JITTargetAddress Addr, int Arg) { - LLVM_DEBUG(dbgs() << "Calling int(*)(int) " << format("0x%016" PRIx64, Addr) - << "\n"); - return callB<exec::CallIntInt>(Addr, Arg); - } - - /// Call the int(int, char*[]) function at the given address in the target and - /// return its result. - Expected<int> callMain(JITTargetAddress Addr, - const std::vector<std::string> &Args) { - LLVM_DEBUG(dbgs() << "Calling int(*)(int, char*[]) " - << format("0x%016" PRIx64, Addr) << "\n"); - return callB<exec::CallMain>(Addr, Args); - } - - /// Call the void() function at the given address in the target and wait for - /// it to finish. - Error callVoidVoid(JITTargetAddress Addr) { - LLVM_DEBUG(dbgs() << "Calling void(*)(void) " - << format("0x%016" PRIx64, Addr) << "\n"); - return callB<exec::CallVoidVoid>(Addr); - } - - /// Create an RCMemoryManager which will allocate its memory on the remote - /// target. - Expected<std::unique_ptr<RemoteRTDyldMemoryManager>> - createRemoteMemoryManager() { - auto Id = AllocatorIds.getNext(); - if (auto Err = callB<mem::CreateRemoteAllocator>(Id)) - return std::move(Err); - return std::unique_ptr<RemoteRTDyldMemoryManager>( - new RemoteRTDyldMemoryManager(*this, Id)); - } - - /// Create a JITLink-compatible memory manager which will allocate working - /// memory on the host and target memory on the remote target. - Expected<std::unique_ptr<RemoteJITLinkMemoryManager>> - createRemoteJITLinkMemoryManager() { - auto Id = AllocatorIds.getNext(); - if (auto Err = callB<mem::CreateRemoteAllocator>(Id)) - return std::move(Err); - LLVM_DEBUG(dbgs() << "Created remote allocator " << Id << "\n"); - return std::unique_ptr<RemoteJITLinkMemoryManager>( - new RemoteJITLinkMemoryManager(*this, Id)); - } - - /// Create an RCIndirectStubsManager that will allocate stubs on the remote - /// target. - Expected<std::unique_ptr<RemoteIndirectStubsManager>> - createIndirectStubsManager() { - auto Id = IndirectStubOwnerIds.getNext(); - if (auto Err = callB<stubs::CreateIndirectStubsOwner>(Id)) - return std::move(Err); - return std::make_unique<RemoteIndirectStubsManager>(*this, Id); - } - - Expected<RemoteCompileCallbackManager &> - enableCompileCallbacks(JITTargetAddress ErrorHandlerAddress) { - assert(!CallbackManager && "CallbackManager already obtained"); - - // Emit the resolver block on the JIT server. - if (auto Err = callB<stubs::EmitResolverBlock>()) - return std::move(Err); - - // Create the callback manager. - CallbackManager.emplace(*this, ES, ErrorHandlerAddress); - RemoteCompileCallbackManager &Mgr = *CallbackManager; - return Mgr; - } - - /// Search for symbols in the remote process. Note: This should be used by - /// symbol resolvers *after* they've searched the local symbol table in the - /// JIT stack. - Expected<JITTargetAddress> getSymbolAddress(StringRef Name) { - return callB<utils::GetSymbolAddress>(Name); - } - - /// Get the triple for the remote target. - const std::string &getTargetTriple() const { return RemoteTargetTriple; } - - Error terminateSession() { return callB<utils::TerminateSession>(); } - -private: - OrcRemoteTargetClient(shared::RawByteChannel &Channel, ExecutionSession &ES, - Error &Err) - : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(Channel, - true), - ES(ES) { - ErrorAsOutParameter EAO(&Err); - - addHandler<utils::RequestCompile>( - [this](JITTargetAddress Addr) -> JITTargetAddress { - if (CallbackManager) - return CallbackManager->executeCompileCallback(Addr); - return 0; - }); - - if (auto RIOrErr = callB<utils::GetRemoteInfo>()) { - std::tie(RemoteTargetTriple, RemotePointerSize, RemotePageSize, - RemoteTrampolineSize, RemoteIndirectStubSize) = *RIOrErr; - Err = Error::success(); - } else - Err = RIOrErr.takeError(); - } - - void deregisterEHFrames(JITTargetAddress Addr, uint32_t Size) { - if (auto Err = callB<eh::RegisterEHFrames>(Addr, Size)) - ES.reportError(std::move(Err)); - } - - void destroyRemoteAllocator(ResourceIdMgr::ResourceId Id) { - if (auto Err = callB<mem::DestroyRemoteAllocator>(Id)) { - // FIXME: This will be triggered by a removeModuleSet call: Propagate - // error return up through that. - llvm_unreachable("Failed to destroy remote allocator."); - AllocatorIds.release(Id); - } - } - - void destroyIndirectStubsManager(ResourceIdMgr::ResourceId Id) { - IndirectStubOwnerIds.release(Id); - if (auto Err = callB<stubs::DestroyIndirectStubsOwner>(Id)) - ES.reportError(std::move(Err)); - } - - Expected<std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>> - emitIndirectStubs(ResourceIdMgr::ResourceId Id, uint32_t NumStubsRequired) { - return callB<stubs::EmitIndirectStubs>(Id, NumStubsRequired); - } - - Expected<std::tuple<JITTargetAddress, uint32_t>> emitTrampolineBlock() { - return callB<stubs::EmitTrampolineBlock>(); - } - - uint32_t getIndirectStubSize() const { return RemoteIndirectStubSize; } - uint32_t getPageSize() const { return RemotePageSize; } - uint32_t getPointerSize() const { return RemotePointerSize; } - - uint32_t getTrampolineSize() const { return RemoteTrampolineSize; } - - Expected<std::vector<uint8_t>> readMem(char *Dst, JITTargetAddress Src, - uint64_t Size) { - return callB<mem::ReadMem>(Src, Size); - } - - Error registerEHFrames(JITTargetAddress &RAddr, uint32_t Size) { - // FIXME: Duplicate error and report it via ReportError too? - return callB<eh::RegisterEHFrames>(RAddr, Size); - } - - JITTargetAddress reserveMem(ResourceIdMgr::ResourceId Id, uint64_t Size, - uint32_t Align) { - if (auto AddrOrErr = callB<mem::ReserveMem>(Id, Size, Align)) - return *AddrOrErr; - else { - ES.reportError(AddrOrErr.takeError()); - return 0; - } - } - - bool setProtections(ResourceIdMgr::ResourceId Id, - JITTargetAddress RemoteSegAddr, unsigned ProtFlags) { - if (auto Err = callB<mem::SetProtections>(Id, RemoteSegAddr, ProtFlags)) { - ES.reportError(std::move(Err)); - return true; - } else - return false; - } - - bool writeMem(JITTargetAddress Addr, const char *Src, uint64_t Size) { - if (auto Err = callB<mem::WriteMem>(DirectBufferWriter(Src, Addr, Size))) { - ES.reportError(std::move(Err)); - return true; - } else - return false; - } - - Error writePointer(JITTargetAddress Addr, JITTargetAddress PtrVal) { - return callB<mem::WritePtr>(Addr, PtrVal); - } - - static Error doNothing() { return Error::success(); } - - ExecutionSession &ES; - std::function<void(Error)> ReportError; - std::string RemoteTargetTriple; - uint32_t RemotePointerSize = 0; - uint32_t RemotePageSize = 0; - uint32_t RemoteTrampolineSize = 0; - uint32_t RemoteIndirectStubSize = 0; - ResourceIdMgr AllocatorIds, IndirectStubOwnerIds; - Optional<RemoteCompileCallbackManager> CallbackManager; -}; - -} // end namespace remote -} // end namespace orc -} // end namespace llvm - -#undef DEBUG_TYPE - -#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h deleted file mode 100644 index 367bfb369191..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h +++ /dev/null @@ -1,386 +0,0 @@ -//===- OrcRemoteTargetRPCAPI.h - Orc Remote-target RPC API ------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the Orc remote-target RPC API. It should not be used -// directly, but is used by the RemoteTargetClient and RemoteTargetServer -// classes. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H -#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H - -#include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h" -#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h" - -namespace llvm { -namespace orc { - -namespace remote { - -/// Template error for missing resources. -template <typename ResourceIdT> -class ResourceNotFound - : public ErrorInfo<ResourceNotFound<ResourceIdT>> { -public: - static char ID; - - ResourceNotFound(ResourceIdT ResourceId, - std::string ResourceDescription = "") - : ResourceId(std::move(ResourceId)), - ResourceDescription(std::move(ResourceDescription)) {} - - std::error_code convertToErrorCode() const override { - return orcError(OrcErrorCode::UnknownResourceHandle); - } - - void log(raw_ostream &OS) const override { - OS << (ResourceDescription.empty() - ? "Remote resource with id " - : ResourceDescription) - << " " << ResourceId << " not found"; - } - -private: - ResourceIdT ResourceId; - std::string ResourceDescription; -}; - -template <typename ResourceIdT> -char ResourceNotFound<ResourceIdT>::ID = 0; - -class DirectBufferWriter { -public: - DirectBufferWriter() = default; - DirectBufferWriter(const char *Src, JITTargetAddress Dst, uint64_t Size) - : Src(Src), Dst(Dst), Size(Size) {} - - const char *getSrc() const { return Src; } - JITTargetAddress getDst() const { return Dst; } - uint64_t getSize() const { return Size; } - -private: - const char *Src; - JITTargetAddress Dst; - uint64_t Size; -}; - -} // end namespace remote - -namespace shared { - -template <> class SerializationTypeName<JITSymbolFlags> { -public: - static const char *getName() { return "JITSymbolFlags"; } -}; - -template <typename ChannelT> -class SerializationTraits<ChannelT, JITSymbolFlags> { -public: - - static Error serialize(ChannelT &C, const JITSymbolFlags &Flags) { - return serializeSeq(C, Flags.getRawFlagsValue(), Flags.getTargetFlags()); - } - - static Error deserialize(ChannelT &C, JITSymbolFlags &Flags) { - JITSymbolFlags::UnderlyingType JITFlags; - JITSymbolFlags::TargetFlagsType TargetFlags; - if (auto Err = deserializeSeq(C, JITFlags, TargetFlags)) - return Err; - Flags = JITSymbolFlags(static_cast<JITSymbolFlags::FlagNames>(JITFlags), - TargetFlags); - return Error::success(); - } -}; - -template <> class SerializationTypeName<remote::DirectBufferWriter> { -public: - static const char *getName() { return "DirectBufferWriter"; } -}; - -template <typename ChannelT> -class SerializationTraits< - ChannelT, remote::DirectBufferWriter, remote::DirectBufferWriter, - std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> { -public: - static Error serialize(ChannelT &C, const remote::DirectBufferWriter &DBW) { - if (auto EC = serializeSeq(C, DBW.getDst())) - return EC; - if (auto EC = serializeSeq(C, DBW.getSize())) - return EC; - return C.appendBytes(DBW.getSrc(), DBW.getSize()); - } - - static Error deserialize(ChannelT &C, remote::DirectBufferWriter &DBW) { - JITTargetAddress Dst; - if (auto EC = deserializeSeq(C, Dst)) - return EC; - uint64_t Size; - if (auto EC = deserializeSeq(C, Size)) - return EC; - char *Addr = reinterpret_cast<char *>(static_cast<uintptr_t>(Dst)); - - DBW = remote::DirectBufferWriter(nullptr, Dst, Size); - - return C.readBytes(Addr, Size); - } -}; - -} // end namespace shared - -namespace remote { - -class ResourceIdMgr { -public: - using ResourceId = uint64_t; - static const ResourceId InvalidId = ~0U; - - ResourceIdMgr() = default; - explicit ResourceIdMgr(ResourceId FirstValidId) - : NextId(std::move(FirstValidId)) {} - - ResourceId getNext() { - if (!FreeIds.empty()) { - ResourceId I = FreeIds.back(); - FreeIds.pop_back(); - return I; - } - assert(NextId + 1 != ~0ULL && "All ids allocated"); - return NextId++; - } - - void release(ResourceId I) { FreeIds.push_back(I); } - -private: - ResourceId NextId = 1; - std::vector<ResourceId> FreeIds; -}; - -/// Registers EH frames on the remote. -namespace eh { - - /// Registers EH frames on the remote. -class RegisterEHFrames - : public shared::RPCFunction<RegisterEHFrames, - void(JITTargetAddress Addr, uint32_t Size)> { -public: - static const char *getName() { return "RegisterEHFrames"; } -}; - - /// Deregisters EH frames on the remote. -class DeregisterEHFrames - : public shared::RPCFunction<DeregisterEHFrames, - void(JITTargetAddress Addr, uint32_t Size)> { -public: - static const char *getName() { return "DeregisterEHFrames"; } -}; - -} // end namespace eh - -/// RPC functions for executing remote code. -namespace exec { - - /// Call an 'int32_t()'-type function on the remote, returns the called - /// function's return value. -class CallIntVoid - : public shared::RPCFunction<CallIntVoid, int32_t(JITTargetAddress Addr)> { -public: - static const char *getName() { return "CallIntVoid"; } -}; - - /// Call an 'int32_t(int32_t)'-type function on the remote, returns the called - /// function's return value. -class CallIntInt - : public shared::RPCFunction<CallIntInt, - int32_t(JITTargetAddress Addr, int)> { -public: - static const char *getName() { return "CallIntInt"; } -}; - - /// Call an 'int32_t(int32_t, char**)'-type function on the remote, returns the - /// called function's return value. -class CallMain - : public shared::RPCFunction<CallMain, - int32_t(JITTargetAddress Addr, - std::vector<std::string> Args)> { -public: - static const char *getName() { return "CallMain"; } -}; - - /// Calls a 'void()'-type function on the remote, returns when the called - /// function completes. -class CallVoidVoid - : public shared::RPCFunction<CallVoidVoid, void(JITTargetAddress FnAddr)> { -public: - static const char *getName() { return "CallVoidVoid"; } -}; - -} // end namespace exec - -/// RPC functions for remote memory management / inspection / modification. -namespace mem { - - /// Creates a memory allocator on the remote. -class CreateRemoteAllocator - : public shared::RPCFunction<CreateRemoteAllocator, - void(ResourceIdMgr::ResourceId AllocatorID)> { -public: - static const char *getName() { return "CreateRemoteAllocator"; } -}; - - /// Destroys a remote allocator, freeing any memory allocated by it. -class DestroyRemoteAllocator - : public shared::RPCFunction<DestroyRemoteAllocator, - void(ResourceIdMgr::ResourceId AllocatorID)> { -public: - static const char *getName() { return "DestroyRemoteAllocator"; } -}; - - /// Read a remote memory block. -class ReadMem - : public shared::RPCFunction< - ReadMem, std::vector<uint8_t>(JITTargetAddress Src, uint64_t Size)> { -public: - static const char *getName() { return "ReadMem"; } -}; - - /// Reserve a block of memory on the remote via the given allocator. -class ReserveMem - : public shared::RPCFunction< - ReserveMem, JITTargetAddress(ResourceIdMgr::ResourceId AllocID, - uint64_t Size, uint32_t Align)> { -public: - static const char *getName() { return "ReserveMem"; } -}; - - /// Set the memory protection on a memory block. -class SetProtections - : public shared::RPCFunction< - SetProtections, void(ResourceIdMgr::ResourceId AllocID, - JITTargetAddress Dst, uint32_t ProtFlags)> { -public: - static const char *getName() { return "SetProtections"; } -}; - - /// Write to a remote memory block. -class WriteMem - : public shared::RPCFunction<WriteMem, - void(remote::DirectBufferWriter DB)> { -public: - static const char *getName() { return "WriteMem"; } -}; - - /// Write to a remote pointer. -class WritePtr - : public shared::RPCFunction<WritePtr, void(JITTargetAddress Dst, - JITTargetAddress Val)> { -public: - static const char *getName() { return "WritePtr"; } -}; - -} // end namespace mem - -/// RPC functions for remote stub and trampoline management. -namespace stubs { - - /// Creates an indirect stub owner on the remote. -class CreateIndirectStubsOwner - : public shared::RPCFunction<CreateIndirectStubsOwner, - void(ResourceIdMgr::ResourceId StubOwnerID)> { -public: - static const char *getName() { return "CreateIndirectStubsOwner"; } -}; - - /// RPC function for destroying an indirect stubs owner. -class DestroyIndirectStubsOwner - : public shared::RPCFunction<DestroyIndirectStubsOwner, - void(ResourceIdMgr::ResourceId StubsOwnerID)> { -public: - static const char *getName() { return "DestroyIndirectStubsOwner"; } -}; - - /// EmitIndirectStubs result is (StubsBase, PtrsBase, NumStubsEmitted). -class EmitIndirectStubs - : public shared::RPCFunction< - EmitIndirectStubs, - std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>( - ResourceIdMgr::ResourceId StubsOwnerID, - uint32_t NumStubsRequired)> { -public: - static const char *getName() { return "EmitIndirectStubs"; } -}; - - /// RPC function to emit the resolver block and return its address. -class EmitResolverBlock - : public shared::RPCFunction<EmitResolverBlock, void()> { -public: - static const char *getName() { return "EmitResolverBlock"; } -}; - - /// EmitTrampolineBlock result is (BlockAddr, NumTrampolines). -class EmitTrampolineBlock - : public shared::RPCFunction<EmitTrampolineBlock, - std::tuple<JITTargetAddress, uint32_t>()> { -public: - static const char *getName() { return "EmitTrampolineBlock"; } -}; - -} // end namespace stubs - -/// Miscelaneous RPC functions for dealing with remotes. -namespace utils { - - /// GetRemoteInfo result is (Triple, PointerSize, PageSize, TrampolineSize, - /// IndirectStubsSize). -class GetRemoteInfo - : public shared::RPCFunction< - GetRemoteInfo, - std::tuple<std::string, uint32_t, uint32_t, uint32_t, uint32_t>()> { -public: - static const char *getName() { return "GetRemoteInfo"; } -}; - - /// Get the address of a remote symbol. -class GetSymbolAddress - : public shared::RPCFunction<GetSymbolAddress, - JITTargetAddress(std::string SymbolName)> { -public: - static const char *getName() { return "GetSymbolAddress"; } -}; - - /// Request that the host execute a compile callback. -class RequestCompile - : public shared::RPCFunction< - RequestCompile, JITTargetAddress(JITTargetAddress TrampolineAddr)> { -public: - static const char *getName() { return "RequestCompile"; } -}; - - /// Notify the remote and terminate the session. -class TerminateSession : public shared::RPCFunction<TerminateSession, void()> { -public: - static const char *getName() { return "TerminateSession"; } -}; - -} // namespace utils - -class OrcRemoteTargetRPCAPI - : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> { -public: - // FIXME: Remove constructors once MSVC supports synthesizing move-ops. - OrcRemoteTargetRPCAPI(shared::RawByteChannel &C) - : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(C, true) {} -}; - -} // end namespace remote - -} // end namespace orc -} // end namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h deleted file mode 100644 index ce9bf064303d..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h +++ /dev/null @@ -1,464 +0,0 @@ -//===- OrcRemoteTargetServer.h - Orc Remote-target Server -------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the OrcRemoteTargetServer class. It can be used to build a -// JIT server that can execute code sent from an OrcRemoteTargetClient. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H -#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H - -#include "llvm/ExecutionEngine/JITSymbol.h" -#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" -#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h" -#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/Memory.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/raw_ostream.h" -#include <algorithm> -#include <cassert> -#include <cstddef> -#include <cstdint> -#include <functional> -#include <map> -#include <memory> -#include <string> -#include <system_error> -#include <tuple> -#include <type_traits> -#include <vector> - -#define DEBUG_TYPE "orc-remote" - -namespace llvm { -namespace orc { -namespace remote { - -template <typename ChannelT, typename TargetT> -class OrcRemoteTargetServer - : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> { -public: - using SymbolLookupFtor = - std::function<JITTargetAddress(const std::string &Name)>; - - using EHFrameRegistrationFtor = - std::function<void(uint8_t *Addr, uint32_t Size)>; - - OrcRemoteTargetServer(ChannelT &Channel, SymbolLookupFtor SymbolLookup, - EHFrameRegistrationFtor EHFramesRegister, - EHFrameRegistrationFtor EHFramesDeregister) - : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(Channel, - true), - SymbolLookup(std::move(SymbolLookup)), - EHFramesRegister(std::move(EHFramesRegister)), - EHFramesDeregister(std::move(EHFramesDeregister)) { - using ThisT = std::remove_reference_t<decltype(*this)>; - addHandler<exec::CallIntVoid>(*this, &ThisT::handleCallIntVoid); - addHandler<exec::CallIntInt>(*this, &ThisT::handleCallIntInt); - addHandler<exec::CallMain>(*this, &ThisT::handleCallMain); - addHandler<exec::CallVoidVoid>(*this, &ThisT::handleCallVoidVoid); - addHandler<mem::CreateRemoteAllocator>(*this, - &ThisT::handleCreateRemoteAllocator); - addHandler<mem::DestroyRemoteAllocator>( - *this, &ThisT::handleDestroyRemoteAllocator); - addHandler<mem::ReadMem>(*this, &ThisT::handleReadMem); - addHandler<mem::ReserveMem>(*this, &ThisT::handleReserveMem); - addHandler<mem::SetProtections>(*this, &ThisT::handleSetProtections); - addHandler<mem::WriteMem>(*this, &ThisT::handleWriteMem); - addHandler<mem::WritePtr>(*this, &ThisT::handleWritePtr); - addHandler<eh::RegisterEHFrames>(*this, &ThisT::handleRegisterEHFrames); - addHandler<eh::DeregisterEHFrames>(*this, &ThisT::handleDeregisterEHFrames); - addHandler<stubs::CreateIndirectStubsOwner>( - *this, &ThisT::handleCreateIndirectStubsOwner); - addHandler<stubs::DestroyIndirectStubsOwner>( - *this, &ThisT::handleDestroyIndirectStubsOwner); - addHandler<stubs::EmitIndirectStubs>(*this, - &ThisT::handleEmitIndirectStubs); - addHandler<stubs::EmitResolverBlock>(*this, - &ThisT::handleEmitResolverBlock); - addHandler<stubs::EmitTrampolineBlock>(*this, - &ThisT::handleEmitTrampolineBlock); - addHandler<utils::GetSymbolAddress>(*this, &ThisT::handleGetSymbolAddress); - addHandler<utils::GetRemoteInfo>(*this, &ThisT::handleGetRemoteInfo); - addHandler<utils::TerminateSession>(*this, &ThisT::handleTerminateSession); - } - - // FIXME: Remove move/copy ops once MSVC supports synthesizing move ops. - OrcRemoteTargetServer(const OrcRemoteTargetServer &) = delete; - OrcRemoteTargetServer &operator=(const OrcRemoteTargetServer &) = delete; - - OrcRemoteTargetServer(OrcRemoteTargetServer &&Other) = default; - OrcRemoteTargetServer &operator=(OrcRemoteTargetServer &&) = delete; - - Expected<JITTargetAddress> requestCompile(JITTargetAddress TrampolineAddr) { - return callB<utils::RequestCompile>(TrampolineAddr); - } - - bool receivedTerminate() const { return TerminateFlag; } - -private: - struct Allocator { - Allocator() = default; - Allocator(Allocator &&Other) : Allocs(std::move(Other.Allocs)) {} - - Allocator &operator=(Allocator &&Other) { - Allocs = std::move(Other.Allocs); - return *this; - } - - ~Allocator() { - for (auto &Alloc : Allocs) - sys::Memory::releaseMappedMemory(Alloc.second); - } - - Error allocate(void *&Addr, size_t Size, uint32_t Align) { - std::error_code EC; - sys::MemoryBlock MB = sys::Memory::allocateMappedMemory( - Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC); - if (EC) - return errorCodeToError(EC); - - Addr = MB.base(); - assert(Allocs.find(MB.base()) == Allocs.end() && "Duplicate alloc"); - Allocs[MB.base()] = std::move(MB); - return Error::success(); - } - - Error setProtections(void *block, unsigned Flags) { - auto I = Allocs.find(block); - if (I == Allocs.end()) - return errorCodeToError(orcError(OrcErrorCode::RemoteMProtectAddrUnrecognized)); - return errorCodeToError( - sys::Memory::protectMappedMemory(I->second, Flags)); - } - - private: - std::map<void *, sys::MemoryBlock> Allocs; - }; - - static Error doNothing() { return Error::success(); } - - static JITTargetAddress reenter(void *JITTargetAddr, void *TrampolineAddr) { - auto T = static_cast<OrcRemoteTargetServer *>(JITTargetAddr); - auto AddrOrErr = T->requestCompile(static_cast<JITTargetAddress>( - reinterpret_cast<uintptr_t>(TrampolineAddr))); - // FIXME: Allow customizable failure substitution functions. - assert(AddrOrErr && "Compile request failed"); - return *AddrOrErr; - } - - Expected<int32_t> handleCallIntVoid(JITTargetAddress Addr) { - using IntVoidFnTy = int (*)(); - - IntVoidFnTy Fn = - reinterpret_cast<IntVoidFnTy>(static_cast<uintptr_t>(Addr)); - - LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr) << "\n"); - int Result = Fn(); - LLVM_DEBUG(dbgs() << " Result = " << Result << "\n"); - - return Result; - } - - Expected<int32_t> handleCallIntInt(JITTargetAddress Addr, int Arg) { - using IntIntFnTy = int (*)(int); - - IntIntFnTy Fn = reinterpret_cast<IntIntFnTy>(static_cast<uintptr_t>(Addr)); - - LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr) - << " with argument " << Arg << "\n"); - int Result = Fn(Arg); - LLVM_DEBUG(dbgs() << " Result = " << Result << "\n"); - - return Result; - } - - Expected<int32_t> handleCallMain(JITTargetAddress Addr, - std::vector<std::string> Args) { - using MainFnTy = int (*)(int, const char *[]); - - MainFnTy Fn = reinterpret_cast<MainFnTy>(static_cast<uintptr_t>(Addr)); - int ArgC = Args.size() + 1; - int Idx = 1; - std::unique_ptr<const char *[]> ArgV(new const char *[ArgC + 1]); - ArgV[0] = "<jit process>"; - for (auto &Arg : Args) - ArgV[Idx++] = Arg.c_str(); - ArgV[ArgC] = 0; - LLVM_DEBUG(for (int Idx = 0; Idx < ArgC; ++Idx) { - llvm::dbgs() << "Arg " << Idx << ": " << ArgV[Idx] << "\n"; - }); - - LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr) << "\n"); - int Result = Fn(ArgC, ArgV.get()); - LLVM_DEBUG(dbgs() << " Result = " << Result << "\n"); - - return Result; - } - - Error handleCallVoidVoid(JITTargetAddress Addr) { - using VoidVoidFnTy = void (*)(); - - VoidVoidFnTy Fn = - reinterpret_cast<VoidVoidFnTy>(static_cast<uintptr_t>(Addr)); - - LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr) << "\n"); - Fn(); - LLVM_DEBUG(dbgs() << " Complete.\n"); - - return Error::success(); - } - - Error handleCreateRemoteAllocator(ResourceIdMgr::ResourceId Id) { - auto I = Allocators.find(Id); - if (I != Allocators.end()) - return errorCodeToError( - orcError(OrcErrorCode::RemoteAllocatorIdAlreadyInUse)); - LLVM_DEBUG(dbgs() << " Created allocator " << Id << "\n"); - Allocators[Id] = Allocator(); - return Error::success(); - } - - Error handleCreateIndirectStubsOwner(ResourceIdMgr::ResourceId Id) { - auto I = IndirectStubsOwners.find(Id); - if (I != IndirectStubsOwners.end()) - return errorCodeToError( - orcError(OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse)); - LLVM_DEBUG(dbgs() << " Create indirect stubs owner " << Id << "\n"); - IndirectStubsOwners[Id] = ISBlockOwnerList(); - return Error::success(); - } - - Error handleDeregisterEHFrames(JITTargetAddress TAddr, uint32_t Size) { - uint8_t *Addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(TAddr)); - LLVM_DEBUG(dbgs() << " Registering EH frames at " - << format("0x%016x", TAddr) << ", Size = " << Size - << " bytes\n"); - EHFramesDeregister(Addr, Size); - return Error::success(); - } - - Error handleDestroyRemoteAllocator(ResourceIdMgr::ResourceId Id) { - auto I = Allocators.find(Id); - if (I == Allocators.end()) - return errorCodeToError( - orcError(OrcErrorCode::RemoteAllocatorDoesNotExist)); - Allocators.erase(I); - LLVM_DEBUG(dbgs() << " Destroyed allocator " << Id << "\n"); - return Error::success(); - } - - Error handleDestroyIndirectStubsOwner(ResourceIdMgr::ResourceId Id) { - auto I = IndirectStubsOwners.find(Id); - if (I == IndirectStubsOwners.end()) - return errorCodeToError( - orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist)); - IndirectStubsOwners.erase(I); - return Error::success(); - } - - Expected<std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>> - handleEmitIndirectStubs(ResourceIdMgr::ResourceId Id, - uint32_t NumStubsRequired) { - LLVM_DEBUG(dbgs() << " ISMgr " << Id << " request " << NumStubsRequired - << " stubs.\n"); - - auto StubOwnerItr = IndirectStubsOwners.find(Id); - if (StubOwnerItr == IndirectStubsOwners.end()) - return errorCodeToError( - orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist)); - - auto IS = LocalIndirectStubsInfo<TargetT>::create( - NumStubsRequired, sys::Process::getPageSizeEstimate()); - if (!IS) - return IS.takeError(); - - JITTargetAddress StubsBase = pointerToJITTargetAddress(IS->getStub(0)); - JITTargetAddress PtrsBase = pointerToJITTargetAddress(IS->getPtr(0)); - uint32_t NumStubsEmitted = IS->getNumStubs(); - - auto &BlockList = StubOwnerItr->second; - BlockList.push_back(std::move(*IS)); - - return std::make_tuple(StubsBase, PtrsBase, NumStubsEmitted); - } - - Error handleEmitResolverBlock() { - std::error_code EC; - ResolverBlock = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory( - TargetT::ResolverCodeSize, nullptr, - sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC)); - if (EC) - return errorCodeToError(EC); - - TargetT::writeResolverCode(static_cast<char *>(ResolverBlock.base()), - pointerToJITTargetAddress(ResolverBlock.base()), - pointerToJITTargetAddress(&reenter), - pointerToJITTargetAddress(this)); - - return errorCodeToError(sys::Memory::protectMappedMemory( - ResolverBlock.getMemoryBlock(), - sys::Memory::MF_READ | sys::Memory::MF_EXEC)); - } - - Expected<std::tuple<JITTargetAddress, uint32_t>> handleEmitTrampolineBlock() { - std::error_code EC; - auto TrampolineBlock = - sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory( - sys::Process::getPageSizeEstimate(), nullptr, - sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC)); - if (EC) - return errorCodeToError(EC); - - uint32_t NumTrampolines = - (sys::Process::getPageSizeEstimate() - TargetT::PointerSize) / - TargetT::TrampolineSize; - - char *TrampolineMem = static_cast<char *>(TrampolineBlock.base()); - TargetT::writeTrampolines( - TrampolineMem, pointerToJITTargetAddress(TrampolineMem), - pointerToJITTargetAddress(ResolverBlock.base()), NumTrampolines); - - EC = sys::Memory::protectMappedMemory(TrampolineBlock.getMemoryBlock(), - sys::Memory::MF_READ | - sys::Memory::MF_EXEC); - - TrampolineBlocks.push_back(std::move(TrampolineBlock)); - - return std::make_tuple(pointerToJITTargetAddress(TrampolineMem), - NumTrampolines); - } - - Expected<JITTargetAddress> handleGetSymbolAddress(const std::string &Name) { - JITTargetAddress Addr = SymbolLookup(Name); - LLVM_DEBUG(dbgs() << " Symbol '" << Name - << "' = " << format("0x%016x", Addr) << "\n"); - return Addr; - } - - Expected<std::tuple<std::string, uint32_t, uint32_t, uint32_t, uint32_t>> - handleGetRemoteInfo() { - std::string ProcessTriple = sys::getProcessTriple(); - uint32_t PointerSize = TargetT::PointerSize; - uint32_t PageSize = sys::Process::getPageSizeEstimate(); - uint32_t TrampolineSize = TargetT::TrampolineSize; - uint32_t IndirectStubSize = TargetT::StubSize; - LLVM_DEBUG(dbgs() << " Remote info:\n" - << " triple = '" << ProcessTriple << "'\n" - << " pointer size = " << PointerSize << "\n" - << " page size = " << PageSize << "\n" - << " trampoline size = " << TrampolineSize << "\n" - << " indirect stub size = " << IndirectStubSize - << "\n"); - return std::make_tuple(ProcessTriple, PointerSize, PageSize, TrampolineSize, - IndirectStubSize); - } - - Expected<std::vector<uint8_t>> handleReadMem(JITTargetAddress RSrc, - uint64_t Size) { - uint8_t *Src = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(RSrc)); - - LLVM_DEBUG(dbgs() << " Reading " << Size << " bytes from " - << format("0x%016x", RSrc) << "\n"); - - std::vector<uint8_t> Buffer; - Buffer.resize(Size); - for (uint8_t *P = Src; Size != 0; --Size) - Buffer.push_back(*P++); - - return Buffer; - } - - Error handleRegisterEHFrames(JITTargetAddress TAddr, uint32_t Size) { - uint8_t *Addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(TAddr)); - LLVM_DEBUG(dbgs() << " Registering EH frames at " - << format("0x%016x", TAddr) << ", Size = " << Size - << " bytes\n"); - EHFramesRegister(Addr, Size); - return Error::success(); - } - - Expected<JITTargetAddress> handleReserveMem(ResourceIdMgr::ResourceId Id, - uint64_t Size, uint32_t Align) { - auto I = Allocators.find(Id); - if (I == Allocators.end()) - return errorCodeToError( - orcError(OrcErrorCode::RemoteAllocatorDoesNotExist)); - auto &Allocator = I->second; - void *LocalAllocAddr = nullptr; - if (auto Err = Allocator.allocate(LocalAllocAddr, Size, Align)) - return std::move(Err); - - LLVM_DEBUG(dbgs() << " Allocator " << Id << " reserved " << LocalAllocAddr - << " (" << Size << " bytes, alignment " << Align - << ")\n"); - - JITTargetAddress AllocAddr = static_cast<JITTargetAddress>( - reinterpret_cast<uintptr_t>(LocalAllocAddr)); - - return AllocAddr; - } - - Error handleSetProtections(ResourceIdMgr::ResourceId Id, - JITTargetAddress Addr, uint32_t Flags) { - auto I = Allocators.find(Id); - if (I == Allocators.end()) - return errorCodeToError( - orcError(OrcErrorCode::RemoteAllocatorDoesNotExist)); - auto &Allocator = I->second; - void *LocalAddr = reinterpret_cast<void *>(static_cast<uintptr_t>(Addr)); - LLVM_DEBUG(dbgs() << " Allocator " << Id << " set permissions on " - << LocalAddr << " to " - << (Flags & sys::Memory::MF_READ ? 'R' : '-') - << (Flags & sys::Memory::MF_WRITE ? 'W' : '-') - << (Flags & sys::Memory::MF_EXEC ? 'X' : '-') << "\n"); - return Allocator.setProtections(LocalAddr, Flags); - } - - Error handleTerminateSession() { - TerminateFlag = true; - return Error::success(); - } - - Error handleWriteMem(DirectBufferWriter DBW) { - LLVM_DEBUG(dbgs() << " Writing " << DBW.getSize() << " bytes to " - << format("0x%016x", DBW.getDst()) << "\n"); - return Error::success(); - } - - Error handleWritePtr(JITTargetAddress Addr, JITTargetAddress PtrVal) { - LLVM_DEBUG(dbgs() << " Writing pointer *" << format("0x%016x", Addr) - << " = " << format("0x%016x", PtrVal) << "\n"); - uintptr_t *Ptr = - reinterpret_cast<uintptr_t *>(static_cast<uintptr_t>(Addr)); - *Ptr = static_cast<uintptr_t>(PtrVal); - return Error::success(); - } - - SymbolLookupFtor SymbolLookup; - EHFrameRegistrationFtor EHFramesRegister, EHFramesDeregister; - std::map<ResourceIdMgr::ResourceId, Allocator> Allocators; - using ISBlockOwnerList = std::vector<LocalIndirectStubsInfo<TargetT>>; - std::map<ResourceIdMgr::ResourceId, ISBlockOwnerList> IndirectStubsOwners; - sys::OwningMemoryBlock ResolverBlock; - std::vector<sys::OwningMemoryBlock> TrampolineBlocks; - bool TerminateFlag = false; -}; - -} // end namespace remote -} // end namespace orc -} // end namespace llvm - -#undef DEBUG_TYPE - -#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h index 78a6623d7594..3c0b2b9edd52 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h @@ -34,25 +34,26 @@ private: }; /// Represents an address in the executor process. -class ExecutorAddress { +class ExecutorAddr { public: - ExecutorAddress() = default; - explicit ExecutorAddress(uint64_t Addr) : Addr(Addr) {} + ExecutorAddr() = default; - /// Create an ExecutorAddress from the given pointer. + /// Create an ExecutorAddr from the given value. + explicit ExecutorAddr(uint64_t Addr) : Addr(Addr) {} + + /// Create an ExecutorAddr from the given pointer. /// Warning: This should only be used when JITing in-process. - template <typename T> static ExecutorAddress fromPtr(T *Value) { - return ExecutorAddress( + template <typename T> static ExecutorAddr fromPtr(T *Value) { + return ExecutorAddr( static_cast<uint64_t>(reinterpret_cast<uintptr_t>(Value))); } - /// Cast this ExecutorAddress to a pointer of the given type. - /// Warning: This should only be esude when JITing in-process. + /// Cast this ExecutorAddr to a pointer of the given type. + /// Warning: This should only be used when JITing in-process. template <typename T> T toPtr() const { static_assert(std::is_pointer<T>::value, "T must be a pointer type"); uintptr_t IntPtr = static_cast<uintptr_t>(Addr); - assert(IntPtr == Addr && - "JITTargetAddress value out of range for uintptr_t"); + assert(IntPtr == Addr && "ExecutorAddr value out of range for uintptr_t"); return reinterpret_cast<T>(IntPtr); } @@ -62,53 +63,47 @@ public: explicit operator bool() const { return Addr != 0; } - friend bool operator==(const ExecutorAddress &LHS, - const ExecutorAddress &RHS) { + friend bool operator==(const ExecutorAddr &LHS, const ExecutorAddr &RHS) { return LHS.Addr == RHS.Addr; } - friend bool operator!=(const ExecutorAddress &LHS, - const ExecutorAddress &RHS) { + friend bool operator!=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) { return LHS.Addr != RHS.Addr; } - friend bool operator<(const ExecutorAddress &LHS, - const ExecutorAddress &RHS) { + friend bool operator<(const ExecutorAddr &LHS, const ExecutorAddr &RHS) { return LHS.Addr < RHS.Addr; } - friend bool operator<=(const ExecutorAddress &LHS, - const ExecutorAddress &RHS) { + friend bool operator<=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) { return LHS.Addr <= RHS.Addr; } - friend bool operator>(const ExecutorAddress &LHS, - const ExecutorAddress &RHS) { + friend bool operator>(const ExecutorAddr &LHS, const ExecutorAddr &RHS) { return LHS.Addr > RHS.Addr; } - friend bool operator>=(const ExecutorAddress &LHS, - const ExecutorAddress &RHS) { + friend bool operator>=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) { return LHS.Addr >= RHS.Addr; } - ExecutorAddress &operator++() { + ExecutorAddr &operator++() { ++Addr; return *this; } - ExecutorAddress &operator--() { + ExecutorAddr &operator--() { --Addr; return *this; } - ExecutorAddress operator++(int) { return ExecutorAddress(Addr++); } - ExecutorAddress operator--(int) { return ExecutorAddress(Addr++); } + ExecutorAddr operator++(int) { return ExecutorAddr(Addr++); } + ExecutorAddr operator--(int) { return ExecutorAddr(Addr--); } - ExecutorAddress &operator+=(const ExecutorAddrDiff Delta) { + ExecutorAddr &operator+=(const ExecutorAddrDiff Delta) { Addr += Delta.getValue(); return *this; } - ExecutorAddress &operator-=(const ExecutorAddrDiff Delta) { + ExecutorAddr &operator-=(const ExecutorAddrDiff Delta) { Addr -= Delta.getValue(); return *this; } @@ -118,83 +113,98 @@ private: }; /// Subtracting two addresses yields an offset. -inline ExecutorAddrDiff operator-(const ExecutorAddress &LHS, - const ExecutorAddress &RHS) { +inline ExecutorAddrDiff operator-(const ExecutorAddr &LHS, + const ExecutorAddr &RHS) { return ExecutorAddrDiff(LHS.getValue() - RHS.getValue()); } /// Adding an offset and an address yields an address. -inline ExecutorAddress operator+(const ExecutorAddress &LHS, - const ExecutorAddrDiff &RHS) { - return ExecutorAddress(LHS.getValue() + RHS.getValue()); +inline ExecutorAddr operator+(const ExecutorAddr &LHS, + const ExecutorAddrDiff &RHS) { + return ExecutorAddr(LHS.getValue() + RHS.getValue()); } /// Adding an address and an offset yields an address. -inline ExecutorAddress operator+(const ExecutorAddrDiff &LHS, - const ExecutorAddress &RHS) { - return ExecutorAddress(LHS.getValue() + RHS.getValue()); +inline ExecutorAddr operator+(const ExecutorAddrDiff &LHS, + const ExecutorAddr &RHS) { + return ExecutorAddr(LHS.getValue() + RHS.getValue()); } /// Represents an address range in the exceutor process. -struct ExecutorAddressRange { - ExecutorAddressRange() = default; - ExecutorAddressRange(ExecutorAddress StartAddress, ExecutorAddress EndAddress) - : StartAddress(StartAddress), EndAddress(EndAddress) {} +struct ExecutorAddrRange { + ExecutorAddrRange() = default; + ExecutorAddrRange(ExecutorAddr Start, ExecutorAddr End) + : Start(Start), End(End) {} + ExecutorAddrRange(ExecutorAddr Start, ExecutorAddrDiff Size) + : Start(Start), End(Start + Size) {} - bool empty() const { return StartAddress == EndAddress; } - ExecutorAddrDiff size() const { return EndAddress - StartAddress; } + bool empty() const { return Start == End; } + ExecutorAddrDiff size() const { return End - Start; } - ExecutorAddress StartAddress; - ExecutorAddress EndAddress; + friend bool operator==(const ExecutorAddrRange &LHS, + const ExecutorAddrRange &RHS) { + return LHS.Start == RHS.Start && LHS.End == RHS.End; + } + friend bool operator!=(const ExecutorAddrRange &LHS, + const ExecutorAddrRange &RHS) { + return !(LHS == RHS); + } + bool contains(ExecutorAddr Addr) const { return Start <= Addr && Addr < End; } + bool overlaps(const ExecutorAddrRange &Other) { + return !(Other.End <= Start || End <= Other.Start); + } + + ExecutorAddr Start; + ExecutorAddr End; }; namespace shared { -/// SPS serializatior for ExecutorAddress. -template <> class SPSSerializationTraits<SPSExecutorAddress, ExecutorAddress> { +class SPSExecutorAddr {}; + +/// SPS serializatior for ExecutorAddr. +template <> class SPSSerializationTraits<SPSExecutorAddr, ExecutorAddr> { public: - static size_t size(const ExecutorAddress &EA) { + static size_t size(const ExecutorAddr &EA) { return SPSArgList<uint64_t>::size(EA.getValue()); } - static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddress &EA) { + static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddr &EA) { return SPSArgList<uint64_t>::serialize(BOB, EA.getValue()); } - static bool deserialize(SPSInputBuffer &BIB, ExecutorAddress &EA) { + static bool deserialize(SPSInputBuffer &BIB, ExecutorAddr &EA) { uint64_t Tmp; if (!SPSArgList<uint64_t>::deserialize(BIB, Tmp)) return false; - EA = ExecutorAddress(Tmp); + EA = ExecutorAddr(Tmp); return true; } }; -using SPSExecutorAddressRange = - SPSTuple<SPSExecutorAddress, SPSExecutorAddress>; +using SPSExecutorAddrRange = SPSTuple<SPSExecutorAddr, SPSExecutorAddr>; /// Serialization traits for address ranges. template <> -class SPSSerializationTraits<SPSExecutorAddressRange, ExecutorAddressRange> { +class SPSSerializationTraits<SPSExecutorAddrRange, ExecutorAddrRange> { public: - static size_t size(const ExecutorAddressRange &Value) { - return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::size( - Value.StartAddress, Value.EndAddress); + static size_t size(const ExecutorAddrRange &Value) { + return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::size(Value.Start, + Value.End); } - static bool serialize(SPSOutputBuffer &BOB, - const ExecutorAddressRange &Value) { - return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::serialize( - BOB, Value.StartAddress, Value.EndAddress); + static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddrRange &Value) { + return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::serialize( + BOB, Value.Start, Value.End); } - static bool deserialize(SPSInputBuffer &BIB, ExecutorAddressRange &Value) { - return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::deserialize( - BIB, Value.StartAddress, Value.EndAddress); + static bool deserialize(SPSInputBuffer &BIB, ExecutorAddrRange &Value) { + return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::deserialize( + BIB, Value.Start, Value.End); } }; -using SPSExecutorAddressRangeSequence = SPSSequence<SPSExecutorAddressRange>; +using SPSExecutorAddrRangeSequence = SPSSequence<SPSExecutorAddrRange>; } // End namespace shared. } // End namespace orc. diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h deleted file mode 100644 index 3f96fe3da49d..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h +++ /dev/null @@ -1,79 +0,0 @@ -//===- FDRawByteChannel.h - File descriptor based byte-channel -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// File descriptor based RawByteChannel. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H -#define LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H - -#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h" - -#if !defined(_MSC_VER) && !defined(__MINGW32__) -#include <unistd.h> -#else -#include <io.h> -#endif - -namespace llvm { -namespace orc { -namespace shared { - -/// Serialization channel that reads from and writes from file descriptors. -class FDRawByteChannel final : public RawByteChannel { -public: - FDRawByteChannel(int InFD, int OutFD) : InFD(InFD), OutFD(OutFD) {} - - llvm::Error readBytes(char *Dst, unsigned Size) override { - assert(Dst && "Attempt to read into null."); - ssize_t Completed = 0; - while (Completed < static_cast<ssize_t>(Size)) { - ssize_t Read = ::read(InFD, Dst + Completed, Size - Completed); - if (Read <= 0) { - auto ErrNo = errno; - if (ErrNo == EAGAIN || ErrNo == EINTR) - continue; - else - return llvm::errorCodeToError( - std::error_code(errno, std::generic_category())); - } - Completed += Read; - } - return llvm::Error::success(); - } - - llvm::Error appendBytes(const char *Src, unsigned Size) override { - assert(Src && "Attempt to append from null."); - ssize_t Completed = 0; - while (Completed < static_cast<ssize_t>(Size)) { - ssize_t Written = ::write(OutFD, Src + Completed, Size - Completed); - if (Written < 0) { - auto ErrNo = errno; - if (ErrNo == EAGAIN || ErrNo == EINTR) - continue; - else - return llvm::errorCodeToError( - std::error_code(errno, std::generic_category())); - } - Completed += Written; - } - return llvm::Error::success(); - } - - llvm::Error send() override { return llvm::Error::success(); } - -private: - int InFD, OutFD; -}; - -} // namespace shared -} // namespace orc -} // namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h new file mode 100644 index 000000000000..3ef43f33d84c --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h @@ -0,0 +1,68 @@ +//===---- OrcRTBridge.h -- Utils for interacting with orc-rt ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Declares types and symbol names provided by the ORC runtime. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_ORCRTBRIDGE_H +#define LLVM_EXECUTIONENGINE_ORC_SHARED_ORCRTBRIDGE_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" +#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h" +#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h" + +namespace llvm { +namespace orc { +namespace rt { + +extern const char *SimpleExecutorDylibManagerInstanceName; +extern const char *SimpleExecutorDylibManagerOpenWrapperName; +extern const char *SimpleExecutorDylibManagerLookupWrapperName; + +extern const char *SimpleExecutorMemoryManagerInstanceName; +extern const char *SimpleExecutorMemoryManagerReserveWrapperName; +extern const char *SimpleExecutorMemoryManagerFinalizeWrapperName; +extern const char *SimpleExecutorMemoryManagerDeallocateWrapperName; + +extern const char *MemoryWriteUInt8sWrapperName; +extern const char *MemoryWriteUInt16sWrapperName; +extern const char *MemoryWriteUInt32sWrapperName; +extern const char *MemoryWriteUInt64sWrapperName; +extern const char *MemoryWriteBuffersWrapperName; + +extern const char *RegisterEHFrameSectionCustomDirectWrapperName; +extern const char *DeregisterEHFrameSectionCustomDirectWrapperName; + +extern const char *RunAsMainWrapperName; + +using SPSSimpleExecutorDylibManagerOpenSignature = + shared::SPSExpected<uint64_t>(shared::SPSExecutorAddr, shared::SPSString, + uint64_t); + +using SPSSimpleExecutorDylibManagerLookupSignature = + shared::SPSExpected<shared::SPSSequence<shared::SPSExecutorAddr>>( + shared::SPSExecutorAddr, uint64_t, shared::SPSRemoteSymbolLookupSet); + +using SPSSimpleExecutorMemoryManagerReserveSignature = + shared::SPSExpected<shared::SPSExecutorAddr>(shared::SPSExecutorAddr, + uint64_t); +using SPSSimpleExecutorMemoryManagerFinalizeSignature = + shared::SPSError(shared::SPSExecutorAddr, shared::SPSFinalizeRequest); +using SPSSimpleExecutorMemoryManagerDeallocateSignature = shared::SPSError( + shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSExecutorAddr>); + +using SPSRunAsMainSignature = int64_t(shared::SPSExecutorAddr, + shared::SPSSequence<shared::SPSString>); + +} // end namespace rt +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_ORCRTBRIDGE_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h deleted file mode 100644 index 1ff47ce42758..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h +++ /dev/null @@ -1,1659 +0,0 @@ -//===- RPCUtils.h - Utilities for building RPC APIs -------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Utilities to support construction of simple RPC APIs. -// -// The RPC utilities aim for ease of use (minimal conceptual overhead) for C++ -// programmers, high performance, low memory overhead, and efficient use of the -// communications channel. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H -#define LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H - -#include <map> -#include <thread> -#include <vector> - -#include "llvm/ADT/STLExtras.h" -#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h" -#include "llvm/ExecutionEngine/Orc/Shared/Serialization.h" -#include "llvm/Support/MSVCErrorWorkarounds.h" - -#include <future> - -namespace llvm { -namespace orc { -namespace shared { - -/// Base class of all fatal RPC errors (those that necessarily result in the -/// termination of the RPC session). -class RPCFatalError : public ErrorInfo<RPCFatalError> { -public: - static char ID; -}; - -/// RPCConnectionClosed is returned from RPC operations if the RPC connection -/// has already been closed due to either an error or graceful disconnection. -class ConnectionClosed : public ErrorInfo<ConnectionClosed> { -public: - static char ID; - std::error_code convertToErrorCode() const override; - void log(raw_ostream &OS) const override; -}; - -/// BadFunctionCall is returned from handleOne when the remote makes a call with -/// an unrecognized function id. -/// -/// This error is fatal because Orc RPC needs to know how to parse a function -/// call to know where the next call starts, and if it doesn't recognize the -/// function id it cannot parse the call. -template <typename FnIdT, typename SeqNoT> -class BadFunctionCall - : public ErrorInfo<BadFunctionCall<FnIdT, SeqNoT>, RPCFatalError> { -public: - static char ID; - - BadFunctionCall(FnIdT FnId, SeqNoT SeqNo) - : FnId(std::move(FnId)), SeqNo(std::move(SeqNo)) {} - - std::error_code convertToErrorCode() const override { - return orcError(OrcErrorCode::UnexpectedRPCCall); - } - - void log(raw_ostream &OS) const override { - OS << "Call to invalid RPC function id '" << FnId - << "' with " - "sequence number " - << SeqNo; - } - -private: - FnIdT FnId; - SeqNoT SeqNo; -}; - -template <typename FnIdT, typename SeqNoT> -char BadFunctionCall<FnIdT, SeqNoT>::ID = 0; - -/// InvalidSequenceNumberForResponse is returned from handleOne when a response -/// call arrives with a sequence number that doesn't correspond to any in-flight -/// function call. -/// -/// This error is fatal because Orc RPC needs to know how to parse the rest of -/// the response call to know where the next call starts, and if it doesn't have -/// a result parser for this sequence number it can't do that. -template <typename SeqNoT> -class InvalidSequenceNumberForResponse - : public ErrorInfo<InvalidSequenceNumberForResponse<SeqNoT>, - RPCFatalError> { -public: - static char ID; - - InvalidSequenceNumberForResponse(SeqNoT SeqNo) : SeqNo(std::move(SeqNo)) {} - - std::error_code convertToErrorCode() const override { - return orcError(OrcErrorCode::UnexpectedRPCCall); - }; - - void log(raw_ostream &OS) const override { - OS << "Response has unknown sequence number " << SeqNo; - } - -private: - SeqNoT SeqNo; -}; - -template <typename SeqNoT> -char InvalidSequenceNumberForResponse<SeqNoT>::ID = 0; - -/// This non-fatal error will be passed to asynchronous result handlers in place -/// of a result if the connection goes down before a result returns, or if the -/// function to be called cannot be negotiated with the remote. -class ResponseAbandoned : public ErrorInfo<ResponseAbandoned> { -public: - static char ID; - - std::error_code convertToErrorCode() const override; - void log(raw_ostream &OS) const override; -}; - -/// This error is returned if the remote does not have a handler installed for -/// the given RPC function. -class CouldNotNegotiate : public ErrorInfo<CouldNotNegotiate> { -public: - static char ID; - - CouldNotNegotiate(std::string Signature); - std::error_code convertToErrorCode() const override; - void log(raw_ostream &OS) const override; - const std::string &getSignature() const { return Signature; } - -private: - std::string Signature; -}; - -template <typename DerivedFunc, typename FnT> class RPCFunction; - -// RPC Function class. -// DerivedFunc should be a user defined class with a static 'getName()' method -// returning a const char* representing the function's name. -template <typename DerivedFunc, typename RetT, typename... ArgTs> -class RPCFunction<DerivedFunc, RetT(ArgTs...)> { -public: - /// User defined function type. - using Type = RetT(ArgTs...); - - /// Return type. - using ReturnType = RetT; - - /// Returns the full function prototype as a string. - static const char *getPrototype() { - static std::string Name = [] { - std::string Name; - raw_string_ostream(Name) - << SerializationTypeName<RetT>::getName() << " " - << DerivedFunc::getName() << "(" - << SerializationTypeNameSequence<ArgTs...>() << ")"; - return Name; - }(); - return Name.data(); - } -}; - -/// Allocates RPC function ids during autonegotiation. -/// Specializations of this class must provide four members: -/// -/// static T getInvalidId(): -/// Should return a reserved id that will be used to represent missing -/// functions during autonegotiation. -/// -/// static T getResponseId(): -/// Should return a reserved id that will be used to send function responses -/// (return values). -/// -/// static T getNegotiateId(): -/// Should return a reserved id for the negotiate function, which will be used -/// to negotiate ids for user defined functions. -/// -/// template <typename Func> T allocate(): -/// Allocate a unique id for function Func. -template <typename T, typename = void> class RPCFunctionIdAllocator; - -/// This specialization of RPCFunctionIdAllocator provides a default -/// implementation for integral types. -template <typename T> -class RPCFunctionIdAllocator<T, std::enable_if_t<std::is_integral<T>::value>> { -public: - static T getInvalidId() { return T(0); } - static T getResponseId() { return T(1); } - static T getNegotiateId() { return T(2); } - - template <typename Func> T allocate() { return NextId++; } - -private: - T NextId = 3; -}; - -namespace detail { - -/// Provides a typedef for a tuple containing the decayed argument types. -template <typename T> class RPCFunctionArgsTuple; - -template <typename RetT, typename... ArgTs> -class RPCFunctionArgsTuple<RetT(ArgTs...)> { -public: - using Type = std::tuple<std::decay_t<std::remove_reference_t<ArgTs>>...>; -}; - -// ResultTraits provides typedefs and utilities specific to the return type -// of functions. -template <typename RetT> class ResultTraits { -public: - // The return type wrapped in llvm::Expected. - using ErrorReturnType = Expected<RetT>; - -#ifdef _MSC_VER - // The ErrorReturnType wrapped in a std::promise. - using ReturnPromiseType = std::promise<MSVCPExpected<RetT>>; - - // The ErrorReturnType wrapped in a std::future. - using ReturnFutureType = std::future<MSVCPExpected<RetT>>; -#else - // The ErrorReturnType wrapped in a std::promise. - using ReturnPromiseType = std::promise<ErrorReturnType>; - - // The ErrorReturnType wrapped in a std::future. - using ReturnFutureType = std::future<ErrorReturnType>; -#endif - - // Create a 'blank' value of the ErrorReturnType, ready and safe to - // overwrite. - static ErrorReturnType createBlankErrorReturnValue() { - return ErrorReturnType(RetT()); - } - - // Consume an abandoned ErrorReturnType. - static void consumeAbandoned(ErrorReturnType RetOrErr) { - consumeError(RetOrErr.takeError()); - } - - static ErrorReturnType returnError(Error Err) { return std::move(Err); } -}; - -// ResultTraits specialization for void functions. -template <> class ResultTraits<void> { -public: - // For void functions, ErrorReturnType is llvm::Error. - using ErrorReturnType = Error; - -#ifdef _MSC_VER - // The ErrorReturnType wrapped in a std::promise. - using ReturnPromiseType = std::promise<MSVCPError>; - - // The ErrorReturnType wrapped in a std::future. - using ReturnFutureType = std::future<MSVCPError>; -#else - // The ErrorReturnType wrapped in a std::promise. - using ReturnPromiseType = std::promise<ErrorReturnType>; - - // The ErrorReturnType wrapped in a std::future. - using ReturnFutureType = std::future<ErrorReturnType>; -#endif - - // Create a 'blank' value of the ErrorReturnType, ready and safe to - // overwrite. - static ErrorReturnType createBlankErrorReturnValue() { - return ErrorReturnType::success(); - } - - // Consume an abandoned ErrorReturnType. - static void consumeAbandoned(ErrorReturnType Err) { - consumeError(std::move(Err)); - } - - static ErrorReturnType returnError(Error Err) { return Err; } -}; - -// ResultTraits<Error> is equivalent to ResultTraits<void>. This allows -// handlers for void RPC functions to return either void (in which case they -// implicitly succeed) or Error (in which case their error return is -// propagated). See usage in HandlerTraits::runHandlerHelper. -template <> class ResultTraits<Error> : public ResultTraits<void> {}; - -// ResultTraits<Expected<T>> is equivalent to ResultTraits<T>. This allows -// handlers for RPC functions returning a T to return either a T (in which -// case they implicitly succeed) or Expected<T> (in which case their error -// return is propagated). See usage in HandlerTraits::runHandlerHelper. -template <typename RetT> -class ResultTraits<Expected<RetT>> : public ResultTraits<RetT> {}; - -// Determines whether an RPC function's defined error return type supports -// error return value. -template <typename T> class SupportsErrorReturn { -public: - static const bool value = false; -}; - -template <> class SupportsErrorReturn<Error> { -public: - static const bool value = true; -}; - -template <typename T> class SupportsErrorReturn<Expected<T>> { -public: - static const bool value = true; -}; - -// RespondHelper packages return values based on whether or not the declared -// RPC function return type supports error returns. -template <bool FuncSupportsErrorReturn> class RespondHelper; - -// RespondHelper specialization for functions that support error returns. -template <> class RespondHelper<true> { -public: - // Send Expected<T>. - template <typename WireRetT, typename HandlerRetT, typename ChannelT, - typename FunctionIdT, typename SequenceNumberT> - static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId, - SequenceNumberT SeqNo, - Expected<HandlerRetT> ResultOrErr) { - if (!ResultOrErr && ResultOrErr.template errorIsA<RPCFatalError>()) - return ResultOrErr.takeError(); - - // Open the response message. - if (auto Err = C.startSendMessage(ResponseId, SeqNo)) - return Err; - - // Serialize the result. - if (auto Err = - SerializationTraits<ChannelT, WireRetT, Expected<HandlerRetT>>:: - serialize(C, std::move(ResultOrErr))) - return Err; - - // Close the response message. - if (auto Err = C.endSendMessage()) - return Err; - return C.send(); - } - - template <typename ChannelT, typename FunctionIdT, typename SequenceNumberT> - static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId, - SequenceNumberT SeqNo, Error Err) { - if (Err && Err.isA<RPCFatalError>()) - return Err; - if (auto Err2 = C.startSendMessage(ResponseId, SeqNo)) - return Err2; - if (auto Err2 = serializeSeq(C, std::move(Err))) - return Err2; - if (auto Err2 = C.endSendMessage()) - return Err2; - return C.send(); - } -}; - -// RespondHelper specialization for functions that do not support error returns. -template <> class RespondHelper<false> { -public: - template <typename WireRetT, typename HandlerRetT, typename ChannelT, - typename FunctionIdT, typename SequenceNumberT> - static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId, - SequenceNumberT SeqNo, - Expected<HandlerRetT> ResultOrErr) { - if (auto Err = ResultOrErr.takeError()) - return Err; - - // Open the response message. - if (auto Err = C.startSendMessage(ResponseId, SeqNo)) - return Err; - - // Serialize the result. - if (auto Err = - SerializationTraits<ChannelT, WireRetT, HandlerRetT>::serialize( - C, *ResultOrErr)) - return Err; - - // End the response message. - if (auto Err = C.endSendMessage()) - return Err; - - return C.send(); - } - - template <typename ChannelT, typename FunctionIdT, typename SequenceNumberT> - static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId, - SequenceNumberT SeqNo, Error Err) { - if (Err) - return Err; - if (auto Err2 = C.startSendMessage(ResponseId, SeqNo)) - return Err2; - if (auto Err2 = C.endSendMessage()) - return Err2; - return C.send(); - } -}; - -// Send a response of the given wire return type (WireRetT) over the -// channel, with the given sequence number. -template <typename WireRetT, typename HandlerRetT, typename ChannelT, - typename FunctionIdT, typename SequenceNumberT> -Error respond(ChannelT &C, const FunctionIdT &ResponseId, SequenceNumberT SeqNo, - Expected<HandlerRetT> ResultOrErr) { - return RespondHelper<SupportsErrorReturn<WireRetT>::value>:: - template sendResult<WireRetT>(C, ResponseId, SeqNo, - std::move(ResultOrErr)); -} - -// Send an empty response message on the given channel to indicate that -// the handler ran. -template <typename WireRetT, typename ChannelT, typename FunctionIdT, - typename SequenceNumberT> -Error respond(ChannelT &C, const FunctionIdT &ResponseId, SequenceNumberT SeqNo, - Error Err) { - return RespondHelper<SupportsErrorReturn<WireRetT>::value>::sendResult( - C, ResponseId, SeqNo, std::move(Err)); -} - -// Converts a given type to the equivalent error return type. -template <typename T> class WrappedHandlerReturn { -public: - using Type = Expected<T>; -}; - -template <typename T> class WrappedHandlerReturn<Expected<T>> { -public: - using Type = Expected<T>; -}; - -template <> class WrappedHandlerReturn<void> { -public: - using Type = Error; -}; - -template <> class WrappedHandlerReturn<Error> { -public: - using Type = Error; -}; - -template <> class WrappedHandlerReturn<ErrorSuccess> { -public: - using Type = Error; -}; - -// Traits class that strips the response function from the list of handler -// arguments. -template <typename FnT> class AsyncHandlerTraits; - -template <typename ResultT, typename... ArgTs> -class AsyncHandlerTraits<Error(std::function<Error(Expected<ResultT>)>, - ArgTs...)> { -public: - using Type = Error(ArgTs...); - using ResultType = Expected<ResultT>; -}; - -template <typename... ArgTs> -class AsyncHandlerTraits<Error(std::function<Error(Error)>, ArgTs...)> { -public: - using Type = Error(ArgTs...); - using ResultType = Error; -}; - -template <typename... ArgTs> -class AsyncHandlerTraits<ErrorSuccess(std::function<Error(Error)>, ArgTs...)> { -public: - using Type = Error(ArgTs...); - using ResultType = Error; -}; - -template <typename... ArgTs> -class AsyncHandlerTraits<void(std::function<Error(Error)>, ArgTs...)> { -public: - using Type = Error(ArgTs...); - using ResultType = Error; -}; - -template <typename ResponseHandlerT, typename... ArgTs> -class AsyncHandlerTraits<Error(ResponseHandlerT, ArgTs...)> - : public AsyncHandlerTraits<Error(std::decay_t<ResponseHandlerT>, - ArgTs...)> {}; - -// This template class provides utilities related to RPC function handlers. -// The base case applies to non-function types (the template class is -// specialized for function types) and inherits from the appropriate -// speciilization for the given non-function type's call operator. -template <typename HandlerT> -class HandlerTraits - : public HandlerTraits< - decltype(&std::remove_reference<HandlerT>::type::operator())> {}; - -// Traits for handlers with a given function type. -template <typename RetT, typename... ArgTs> -class HandlerTraits<RetT(ArgTs...)> { -public: - // Function type of the handler. - using Type = RetT(ArgTs...); - - // Return type of the handler. - using ReturnType = RetT; - - // Call the given handler with the given arguments. - template <typename HandlerT, typename... TArgTs> - static typename WrappedHandlerReturn<RetT>::Type - unpackAndRun(HandlerT &Handler, std::tuple<TArgTs...> &Args) { - return unpackAndRunHelper(Handler, Args, - std::index_sequence_for<TArgTs...>()); - } - - // Call the given handler with the given arguments. - template <typename HandlerT, typename ResponderT, typename... TArgTs> - static Error unpackAndRunAsync(HandlerT &Handler, ResponderT &Responder, - std::tuple<TArgTs...> &Args) { - return unpackAndRunAsyncHelper(Handler, Responder, Args, - std::index_sequence_for<TArgTs...>()); - } - - // Call the given handler with the given arguments. - template <typename HandlerT> - static std::enable_if_t< - std::is_void<typename HandlerTraits<HandlerT>::ReturnType>::value, Error> - run(HandlerT &Handler, ArgTs &&...Args) { - Handler(std::move(Args)...); - return Error::success(); - } - - template <typename HandlerT, typename... TArgTs> - static std::enable_if_t< - !std::is_void<typename HandlerTraits<HandlerT>::ReturnType>::value, - typename HandlerTraits<HandlerT>::ReturnType> - run(HandlerT &Handler, TArgTs... Args) { - return Handler(std::move(Args)...); - } - - // Serialize arguments to the channel. - template <typename ChannelT, typename... CArgTs> - static Error serializeArgs(ChannelT &C, const CArgTs... CArgs) { - return SequenceSerialization<ChannelT, ArgTs...>::serialize(C, CArgs...); - } - - // Deserialize arguments from the channel. - template <typename ChannelT, typename... CArgTs> - static Error deserializeArgs(ChannelT &C, std::tuple<CArgTs...> &Args) { - return deserializeArgsHelper(C, Args, std::index_sequence_for<CArgTs...>()); - } - -private: - template <typename ChannelT, typename... CArgTs, size_t... Indexes> - static Error deserializeArgsHelper(ChannelT &C, std::tuple<CArgTs...> &Args, - std::index_sequence<Indexes...> _) { - return SequenceSerialization<ChannelT, ArgTs...>::deserialize( - C, std::get<Indexes>(Args)...); - } - - template <typename HandlerT, typename ArgTuple, size_t... Indexes> - static typename WrappedHandlerReturn< - typename HandlerTraits<HandlerT>::ReturnType>::Type - unpackAndRunHelper(HandlerT &Handler, ArgTuple &Args, - std::index_sequence<Indexes...>) { - return run(Handler, std::move(std::get<Indexes>(Args))...); - } - - template <typename HandlerT, typename ResponderT, typename ArgTuple, - size_t... Indexes> - static typename WrappedHandlerReturn< - typename HandlerTraits<HandlerT>::ReturnType>::Type - unpackAndRunAsyncHelper(HandlerT &Handler, ResponderT &Responder, - ArgTuple &Args, std::index_sequence<Indexes...>) { - return run(Handler, Responder, std::move(std::get<Indexes>(Args))...); - } -}; - -// Handler traits for free functions. -template <typename RetT, typename... ArgTs> -class HandlerTraits<RetT (*)(ArgTs...)> : public HandlerTraits<RetT(ArgTs...)> { -}; - -// Handler traits for class methods (especially call operators for lambdas). -template <typename Class, typename RetT, typename... ArgTs> -class HandlerTraits<RetT (Class::*)(ArgTs...)> - : public HandlerTraits<RetT(ArgTs...)> {}; - -// Handler traits for const class methods (especially call operators for -// lambdas). -template <typename Class, typename RetT, typename... ArgTs> -class HandlerTraits<RetT (Class::*)(ArgTs...) const> - : public HandlerTraits<RetT(ArgTs...)> {}; - -// Utility to peel the Expected wrapper off a response handler error type. -template <typename HandlerT> class ResponseHandlerArg; - -template <typename ArgT> class ResponseHandlerArg<Error(Expected<ArgT>)> { -public: - using ArgType = Expected<ArgT>; - using UnwrappedArgType = ArgT; -}; - -template <typename ArgT> -class ResponseHandlerArg<ErrorSuccess(Expected<ArgT>)> { -public: - using ArgType = Expected<ArgT>; - using UnwrappedArgType = ArgT; -}; - -template <> class ResponseHandlerArg<Error(Error)> { -public: - using ArgType = Error; -}; - -template <> class ResponseHandlerArg<ErrorSuccess(Error)> { -public: - using ArgType = Error; -}; - -// ResponseHandler represents a handler for a not-yet-received function call -// result. -template <typename ChannelT> class ResponseHandler { -public: - virtual ~ResponseHandler() {} - - // Reads the function result off the wire and acts on it. The meaning of - // "act" will depend on how this method is implemented in any given - // ResponseHandler subclass but could, for example, mean running a - // user-specified handler or setting a promise value. - virtual Error handleResponse(ChannelT &C) = 0; - - // Abandons this outstanding result. - virtual void abandon() = 0; - - // Create an error instance representing an abandoned response. - static Error createAbandonedResponseError() { - return make_error<ResponseAbandoned>(); - } -}; - -// ResponseHandler subclass for RPC functions with non-void returns. -template <typename ChannelT, typename FuncRetT, typename HandlerT> -class ResponseHandlerImpl : public ResponseHandler<ChannelT> { -public: - ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {} - - // Handle the result by deserializing it from the channel then passing it - // to the user defined handler. - Error handleResponse(ChannelT &C) override { - using UnwrappedArgType = typename ResponseHandlerArg< - typename HandlerTraits<HandlerT>::Type>::UnwrappedArgType; - UnwrappedArgType Result; - if (auto Err = - SerializationTraits<ChannelT, FuncRetT, - UnwrappedArgType>::deserialize(C, Result)) - return Err; - if (auto Err = C.endReceiveMessage()) - return Err; - return Handler(std::move(Result)); - } - - // Abandon this response by calling the handler with an 'abandoned response' - // error. - void abandon() override { - if (auto Err = Handler(this->createAbandonedResponseError())) { - // Handlers should not fail when passed an abandoned response error. - report_fatal_error(std::move(Err)); - } - } - -private: - HandlerT Handler; -}; - -// ResponseHandler subclass for RPC functions with void returns. -template <typename ChannelT, typename HandlerT> -class ResponseHandlerImpl<ChannelT, void, HandlerT> - : public ResponseHandler<ChannelT> { -public: - ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {} - - // Handle the result (no actual value, just a notification that the function - // has completed on the remote end) by calling the user-defined handler with - // Error::success(). - Error handleResponse(ChannelT &C) override { - if (auto Err = C.endReceiveMessage()) - return Err; - return Handler(Error::success()); - } - - // Abandon this response by calling the handler with an 'abandoned response' - // error. - void abandon() override { - if (auto Err = Handler(this->createAbandonedResponseError())) { - // Handlers should not fail when passed an abandoned response error. - report_fatal_error(std::move(Err)); - } - } - -private: - HandlerT Handler; -}; - -template <typename ChannelT, typename FuncRetT, typename HandlerT> -class ResponseHandlerImpl<ChannelT, Expected<FuncRetT>, HandlerT> - : public ResponseHandler<ChannelT> { -public: - ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {} - - // Handle the result by deserializing it from the channel then passing it - // to the user defined handler. - Error handleResponse(ChannelT &C) override { - using HandlerArgType = typename ResponseHandlerArg< - typename HandlerTraits<HandlerT>::Type>::ArgType; - HandlerArgType Result((typename HandlerArgType::value_type())); - - if (auto Err = SerializationTraits<ChannelT, Expected<FuncRetT>, - HandlerArgType>::deserialize(C, Result)) - return Err; - if (auto Err = C.endReceiveMessage()) - return Err; - return Handler(std::move(Result)); - } - - // Abandon this response by calling the handler with an 'abandoned response' - // error. - void abandon() override { - if (auto Err = Handler(this->createAbandonedResponseError())) { - // Handlers should not fail when passed an abandoned response error. - report_fatal_error(std::move(Err)); - } - } - -private: - HandlerT Handler; -}; - -template <typename ChannelT, typename HandlerT> -class ResponseHandlerImpl<ChannelT, Error, HandlerT> - : public ResponseHandler<ChannelT> { -public: - ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {} - - // Handle the result by deserializing it from the channel then passing it - // to the user defined handler. - Error handleResponse(ChannelT &C) override { - Error Result = Error::success(); - if (auto Err = SerializationTraits<ChannelT, Error, Error>::deserialize( - C, Result)) { - consumeError(std::move(Result)); - return Err; - } - if (auto Err = C.endReceiveMessage()) { - consumeError(std::move(Result)); - return Err; - } - return Handler(std::move(Result)); - } - - // Abandon this response by calling the handler with an 'abandoned response' - // error. - void abandon() override { - if (auto Err = Handler(this->createAbandonedResponseError())) { - // Handlers should not fail when passed an abandoned response error. - report_fatal_error(std::move(Err)); - } - } - -private: - HandlerT Handler; -}; - -// Create a ResponseHandler from a given user handler. -template <typename ChannelT, typename FuncRetT, typename HandlerT> -std::unique_ptr<ResponseHandler<ChannelT>> createResponseHandler(HandlerT H) { - return std::make_unique<ResponseHandlerImpl<ChannelT, FuncRetT, HandlerT>>( - std::move(H)); -} - -// Helper for wrapping member functions up as functors. This is useful for -// installing methods as result handlers. -template <typename ClassT, typename RetT, typename... ArgTs> -class MemberFnWrapper { -public: - using MethodT = RetT (ClassT::*)(ArgTs...); - MemberFnWrapper(ClassT &Instance, MethodT Method) - : Instance(Instance), Method(Method) {} - RetT operator()(ArgTs &&...Args) { - return (Instance.*Method)(std::move(Args)...); - } - -private: - ClassT &Instance; - MethodT Method; -}; - -// Helper that provides a Functor for deserializing arguments. -template <typename... ArgTs> class ReadArgs { -public: - Error operator()() { return Error::success(); } -}; - -template <typename ArgT, typename... ArgTs> -class ReadArgs<ArgT, ArgTs...> : public ReadArgs<ArgTs...> { -public: - ReadArgs(ArgT &Arg, ArgTs &...Args) : ReadArgs<ArgTs...>(Args...), Arg(Arg) {} - - Error operator()(ArgT &ArgVal, ArgTs &...ArgVals) { - this->Arg = std::move(ArgVal); - return ReadArgs<ArgTs...>::operator()(ArgVals...); - } - -private: - ArgT &Arg; -}; - -// Manage sequence numbers. -template <typename SequenceNumberT> class SequenceNumberManager { -public: - // Reset, making all sequence numbers available. - void reset() { - std::lock_guard<std::mutex> Lock(SeqNoLock); - NextSequenceNumber = 0; - FreeSequenceNumbers.clear(); - } - - // Get the next available sequence number. Will re-use numbers that have - // been released. - SequenceNumberT getSequenceNumber() { - std::lock_guard<std::mutex> Lock(SeqNoLock); - if (FreeSequenceNumbers.empty()) - return NextSequenceNumber++; - auto SequenceNumber = FreeSequenceNumbers.back(); - FreeSequenceNumbers.pop_back(); - return SequenceNumber; - } - - // Release a sequence number, making it available for re-use. - void releaseSequenceNumber(SequenceNumberT SequenceNumber) { - std::lock_guard<std::mutex> Lock(SeqNoLock); - FreeSequenceNumbers.push_back(SequenceNumber); - } - -private: - std::mutex SeqNoLock; - SequenceNumberT NextSequenceNumber = 0; - std::vector<SequenceNumberT> FreeSequenceNumbers; -}; - -// Checks that predicate P holds for each corresponding pair of type arguments -// from T1 and T2 tuple. -template <template <class, class> class P, typename T1Tuple, typename T2Tuple> -class RPCArgTypeCheckHelper; - -template <template <class, class> class P> -class RPCArgTypeCheckHelper<P, std::tuple<>, std::tuple<>> { -public: - static const bool value = true; -}; - -template <template <class, class> class P, typename T, typename... Ts, - typename U, typename... Us> -class RPCArgTypeCheckHelper<P, std::tuple<T, Ts...>, std::tuple<U, Us...>> { -public: - static const bool value = - P<T, U>::value && - RPCArgTypeCheckHelper<P, std::tuple<Ts...>, std::tuple<Us...>>::value; -}; - -template <template <class, class> class P, typename T1Sig, typename T2Sig> -class RPCArgTypeCheck { -public: - using T1Tuple = typename RPCFunctionArgsTuple<T1Sig>::Type; - using T2Tuple = typename RPCFunctionArgsTuple<T2Sig>::Type; - - static_assert(std::tuple_size<T1Tuple>::value >= - std::tuple_size<T2Tuple>::value, - "Too many arguments to RPC call"); - static_assert(std::tuple_size<T1Tuple>::value <= - std::tuple_size<T2Tuple>::value, - "Too few arguments to RPC call"); - - static const bool value = RPCArgTypeCheckHelper<P, T1Tuple, T2Tuple>::value; -}; - -template <typename ChannelT, typename WireT, typename ConcreteT> -class CanSerialize { -private: - using S = SerializationTraits<ChannelT, WireT, ConcreteT>; - - template <typename T> - static std::true_type check( - std::enable_if_t<std::is_same<decltype(T::serialize( - std::declval<ChannelT &>(), - std::declval<const ConcreteT &>())), - Error>::value, - void *>); - - template <typename> static std::false_type check(...); - -public: - static const bool value = decltype(check<S>(0))::value; -}; - -template <typename ChannelT, typename WireT, typename ConcreteT> -class CanDeserialize { -private: - using S = SerializationTraits<ChannelT, WireT, ConcreteT>; - - template <typename T> - static std::true_type - check(std::enable_if_t< - std::is_same<decltype(T::deserialize(std::declval<ChannelT &>(), - std::declval<ConcreteT &>())), - Error>::value, - void *>); - - template <typename> static std::false_type check(...); - -public: - static const bool value = decltype(check<S>(0))::value; -}; - -/// Contains primitive utilities for defining, calling and handling calls to -/// remote procedures. ChannelT is a bidirectional stream conforming to the -/// RPCChannel interface (see RPCChannel.h), FunctionIdT is a procedure -/// identifier type that must be serializable on ChannelT, and SequenceNumberT -/// is an integral type that will be used to number in-flight function calls. -/// -/// These utilities support the construction of very primitive RPC utilities. -/// Their intent is to ensure correct serialization and deserialization of -/// procedure arguments, and to keep the client and server's view of the API in -/// sync. -template <typename ImplT, typename ChannelT, typename FunctionIdT, - typename SequenceNumberT> -class RPCEndpointBase { -protected: - class OrcRPCInvalid : public RPCFunction<OrcRPCInvalid, void()> { - public: - static const char *getName() { return "__orc_rpc$invalid"; } - }; - - class OrcRPCResponse : public RPCFunction<OrcRPCResponse, void()> { - public: - static const char *getName() { return "__orc_rpc$response"; } - }; - - class OrcRPCNegotiate - : public RPCFunction<OrcRPCNegotiate, FunctionIdT(std::string)> { - public: - static const char *getName() { return "__orc_rpc$negotiate"; } - }; - - // Helper predicate for testing for the presence of SerializeTraits - // serializers. - template <typename WireT, typename ConcreteT> - class CanSerializeCheck : detail::CanSerialize<ChannelT, WireT, ConcreteT> { - public: - using detail::CanSerialize<ChannelT, WireT, ConcreteT>::value; - - static_assert(value, "Missing serializer for argument (Can't serialize the " - "first template type argument of CanSerializeCheck " - "from the second)"); - }; - - // Helper predicate for testing for the presence of SerializeTraits - // deserializers. - template <typename WireT, typename ConcreteT> - class CanDeserializeCheck - : detail::CanDeserialize<ChannelT, WireT, ConcreteT> { - public: - using detail::CanDeserialize<ChannelT, WireT, ConcreteT>::value; - - static_assert(value, "Missing deserializer for argument (Can't deserialize " - "the second template type argument of " - "CanDeserializeCheck from the first)"); - }; - -public: - /// Construct an RPC instance on a channel. - RPCEndpointBase(ChannelT &C, bool LazyAutoNegotiation) - : C(C), LazyAutoNegotiation(LazyAutoNegotiation) { - // Hold ResponseId in a special variable, since we expect Response to be - // called relatively frequently, and want to avoid the map lookup. - ResponseId = FnIdAllocator.getResponseId(); - RemoteFunctionIds[OrcRPCResponse::getPrototype()] = ResponseId; - - // Register the negotiate function id and handler. - auto NegotiateId = FnIdAllocator.getNegotiateId(); - RemoteFunctionIds[OrcRPCNegotiate::getPrototype()] = NegotiateId; - Handlers[NegotiateId] = wrapHandler<OrcRPCNegotiate>( - [this](const std::string &Name) { return handleNegotiate(Name); }); - } - - /// Negotiate a function id for Func with the other end of the channel. - template <typename Func> Error negotiateFunction(bool Retry = false) { - return getRemoteFunctionId<Func>(true, Retry).takeError(); - } - - /// Append a call Func, does not call send on the channel. - /// The first argument specifies a user-defined handler to be run when the - /// function returns. The handler should take an Expected<Func::ReturnType>, - /// or an Error (if Func::ReturnType is void). The handler will be called - /// with an error if the return value is abandoned due to a channel error. - template <typename Func, typename HandlerT, typename... ArgTs> - Error appendCallAsync(HandlerT Handler, const ArgTs &...Args) { - - static_assert( - detail::RPCArgTypeCheck<CanSerializeCheck, typename Func::Type, - void(ArgTs...)>::value, - ""); - - // Look up the function ID. - FunctionIdT FnId; - if (auto FnIdOrErr = getRemoteFunctionId<Func>(LazyAutoNegotiation, false)) - FnId = *FnIdOrErr; - else { - // Negotiation failed. Notify the handler then return the negotiate-failed - // error. - cantFail(Handler(make_error<ResponseAbandoned>())); - return FnIdOrErr.takeError(); - } - - SequenceNumberT SeqNo; // initialized in locked scope below. - { - // Lock the pending responses map and sequence number manager. - std::lock_guard<std::mutex> Lock(ResponsesMutex); - - // Allocate a sequence number. - SeqNo = SequenceNumberMgr.getSequenceNumber(); - assert(!PendingResponses.count(SeqNo) && - "Sequence number already allocated"); - - // Install the user handler. - PendingResponses[SeqNo] = - detail::createResponseHandler<ChannelT, typename Func::ReturnType>( - std::move(Handler)); - } - - // Open the function call message. - if (auto Err = C.startSendMessage(FnId, SeqNo)) { - abandonPendingResponses(); - return Err; - } - - // Serialize the call arguments. - if (auto Err = detail::HandlerTraits<typename Func::Type>::serializeArgs( - C, Args...)) { - abandonPendingResponses(); - return Err; - } - - // Close the function call messagee. - if (auto Err = C.endSendMessage()) { - abandonPendingResponses(); - return Err; - } - - return Error::success(); - } - - Error sendAppendedCalls() { return C.send(); }; - - template <typename Func, typename HandlerT, typename... ArgTs> - Error callAsync(HandlerT Handler, const ArgTs &...Args) { - if (auto Err = appendCallAsync<Func>(std::move(Handler), Args...)) - return Err; - return C.send(); - } - - /// Handle one incoming call. - Error handleOne() { - FunctionIdT FnId; - SequenceNumberT SeqNo; - if (auto Err = C.startReceiveMessage(FnId, SeqNo)) { - abandonPendingResponses(); - return Err; - } - if (FnId == ResponseId) - return handleResponse(SeqNo); - auto I = Handlers.find(FnId); - if (I != Handlers.end()) - return I->second(C, SeqNo); - - // else: No handler found. Report error to client? - return make_error<BadFunctionCall<FunctionIdT, SequenceNumberT>>(FnId, - SeqNo); - } - - /// Helper for handling setter procedures - this method returns a functor that - /// sets the variables referred to by Args... to values deserialized from the - /// channel. - /// E.g. - /// - /// typedef Function<0, bool, int> Func1; - /// - /// ... - /// bool B; - /// int I; - /// if (auto Err = expect<Func1>(Channel, readArgs(B, I))) - /// /* Handle Args */ ; - /// - template <typename... ArgTs> - static detail::ReadArgs<ArgTs...> readArgs(ArgTs &...Args) { - return detail::ReadArgs<ArgTs...>(Args...); - } - - /// Abandon all outstanding result handlers. - /// - /// This will call all currently registered result handlers to receive an - /// "abandoned" error as their argument. This is used internally by the RPC - /// in error situations, but can also be called directly by clients who are - /// disconnecting from the remote and don't or can't expect responses to their - /// outstanding calls. (Especially for outstanding blocking calls, calling - /// this function may be necessary to avoid dead threads). - void abandonPendingResponses() { - // Lock the pending responses map and sequence number manager. - std::lock_guard<std::mutex> Lock(ResponsesMutex); - - for (auto &KV : PendingResponses) - KV.second->abandon(); - PendingResponses.clear(); - SequenceNumberMgr.reset(); - } - - /// Remove the handler for the given function. - /// A handler must currently be registered for this function. - template <typename Func> void removeHandler() { - auto IdItr = LocalFunctionIds.find(Func::getPrototype()); - assert(IdItr != LocalFunctionIds.end() && - "Function does not have a registered handler"); - auto HandlerItr = Handlers.find(IdItr->second); - assert(HandlerItr != Handlers.end() && - "Function does not have a registered handler"); - Handlers.erase(HandlerItr); - } - - /// Clear all handlers. - void clearHandlers() { Handlers.clear(); } - -protected: - FunctionIdT getInvalidFunctionId() const { - return FnIdAllocator.getInvalidId(); - } - - /// Add the given handler to the handler map and make it available for - /// autonegotiation and execution. - template <typename Func, typename HandlerT> - void addHandlerImpl(HandlerT Handler) { - - static_assert(detail::RPCArgTypeCheck< - CanDeserializeCheck, typename Func::Type, - typename detail::HandlerTraits<HandlerT>::Type>::value, - ""); - - FunctionIdT NewFnId = FnIdAllocator.template allocate<Func>(); - LocalFunctionIds[Func::getPrototype()] = NewFnId; - Handlers[NewFnId] = wrapHandler<Func>(std::move(Handler)); - } - - template <typename Func, typename HandlerT> - void addAsyncHandlerImpl(HandlerT Handler) { - - static_assert( - detail::RPCArgTypeCheck< - CanDeserializeCheck, typename Func::Type, - typename detail::AsyncHandlerTraits< - typename detail::HandlerTraits<HandlerT>::Type>::Type>::value, - ""); - - FunctionIdT NewFnId = FnIdAllocator.template allocate<Func>(); - LocalFunctionIds[Func::getPrototype()] = NewFnId; - Handlers[NewFnId] = wrapAsyncHandler<Func>(std::move(Handler)); - } - - Error handleResponse(SequenceNumberT SeqNo) { - using Handler = typename decltype(PendingResponses)::mapped_type; - Handler PRHandler; - - { - // Lock the pending responses map and sequence number manager. - std::unique_lock<std::mutex> Lock(ResponsesMutex); - auto I = PendingResponses.find(SeqNo); - - if (I != PendingResponses.end()) { - PRHandler = std::move(I->second); - PendingResponses.erase(I); - SequenceNumberMgr.releaseSequenceNumber(SeqNo); - } else { - // Unlock the pending results map to prevent recursive lock. - Lock.unlock(); - abandonPendingResponses(); - return make_error<InvalidSequenceNumberForResponse<SequenceNumberT>>( - SeqNo); - } - } - - assert(PRHandler && - "If we didn't find a response handler we should have bailed out"); - - if (auto Err = PRHandler->handleResponse(C)) { - abandonPendingResponses(); - return Err; - } - - return Error::success(); - } - - FunctionIdT handleNegotiate(const std::string &Name) { - auto I = LocalFunctionIds.find(Name); - if (I == LocalFunctionIds.end()) - return getInvalidFunctionId(); - return I->second; - } - - // Find the remote FunctionId for the given function. - template <typename Func> - Expected<FunctionIdT> getRemoteFunctionId(bool NegotiateIfNotInMap, - bool NegotiateIfInvalid) { - bool DoNegotiate; - - // Check if we already have a function id... - auto I = RemoteFunctionIds.find(Func::getPrototype()); - if (I != RemoteFunctionIds.end()) { - // If it's valid there's nothing left to do. - if (I->second != getInvalidFunctionId()) - return I->second; - DoNegotiate = NegotiateIfInvalid; - } else - DoNegotiate = NegotiateIfNotInMap; - - // We don't have a function id for Func yet, but we're allowed to try to - // negotiate one. - if (DoNegotiate) { - auto &Impl = static_cast<ImplT &>(*this); - if (auto RemoteIdOrErr = - Impl.template callB<OrcRPCNegotiate>(Func::getPrototype())) { - RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr; - if (*RemoteIdOrErr == getInvalidFunctionId()) - return make_error<CouldNotNegotiate>(Func::getPrototype()); - return *RemoteIdOrErr; - } else - return RemoteIdOrErr.takeError(); - } - - // No key was available in the map and we weren't allowed to try to - // negotiate one, so return an unknown function error. - return make_error<CouldNotNegotiate>(Func::getPrototype()); - } - - using WrappedHandlerFn = std::function<Error(ChannelT &, SequenceNumberT)>; - - // Wrap the given user handler in the necessary argument-deserialization code, - // result-serialization code, and call to the launch policy (if present). - template <typename Func, typename HandlerT> - WrappedHandlerFn wrapHandler(HandlerT Handler) { - return [this, Handler](ChannelT &Channel, - SequenceNumberT SeqNo) mutable -> Error { - // Start by deserializing the arguments. - using ArgsTuple = typename detail::RPCFunctionArgsTuple< - typename detail::HandlerTraits<HandlerT>::Type>::Type; - auto Args = std::make_shared<ArgsTuple>(); - - if (auto Err = - detail::HandlerTraits<typename Func::Type>::deserializeArgs( - Channel, *Args)) - return Err; - - // GCC 4.7 and 4.8 incorrectly issue a -Wunused-but-set-variable warning - // for RPCArgs. Void cast RPCArgs to work around this for now. - // FIXME: Remove this workaround once we can assume a working GCC version. - (void)Args; - - // End receieve message, unlocking the channel for reading. - if (auto Err = Channel.endReceiveMessage()) - return Err; - - using HTraits = detail::HandlerTraits<HandlerT>; - using FuncReturn = typename Func::ReturnType; - return detail::respond<FuncReturn>(Channel, ResponseId, SeqNo, - HTraits::unpackAndRun(Handler, *Args)); - }; - } - - // Wrap the given user handler in the necessary argument-deserialization code, - // result-serialization code, and call to the launch policy (if present). - template <typename Func, typename HandlerT> - WrappedHandlerFn wrapAsyncHandler(HandlerT Handler) { - return [this, Handler](ChannelT &Channel, - SequenceNumberT SeqNo) mutable -> Error { - // Start by deserializing the arguments. - using AHTraits = detail::AsyncHandlerTraits< - typename detail::HandlerTraits<HandlerT>::Type>; - using ArgsTuple = - typename detail::RPCFunctionArgsTuple<typename AHTraits::Type>::Type; - auto Args = std::make_shared<ArgsTuple>(); - - if (auto Err = - detail::HandlerTraits<typename Func::Type>::deserializeArgs( - Channel, *Args)) - return Err; - - // GCC 4.7 and 4.8 incorrectly issue a -Wunused-but-set-variable warning - // for RPCArgs. Void cast RPCArgs to work around this for now. - // FIXME: Remove this workaround once we can assume a working GCC version. - (void)Args; - - // End receieve message, unlocking the channel for reading. - if (auto Err = Channel.endReceiveMessage()) - return Err; - - using HTraits = detail::HandlerTraits<HandlerT>; - using FuncReturn = typename Func::ReturnType; - auto Responder = [this, - SeqNo](typename AHTraits::ResultType RetVal) -> Error { - return detail::respond<FuncReturn>(C, ResponseId, SeqNo, - std::move(RetVal)); - }; - - return HTraits::unpackAndRunAsync(Handler, Responder, *Args); - }; - } - - ChannelT &C; - - bool LazyAutoNegotiation; - - RPCFunctionIdAllocator<FunctionIdT> FnIdAllocator; - - FunctionIdT ResponseId; - std::map<std::string, FunctionIdT> LocalFunctionIds; - std::map<const char *, FunctionIdT> RemoteFunctionIds; - - std::map<FunctionIdT, WrappedHandlerFn> Handlers; - - std::mutex ResponsesMutex; - detail::SequenceNumberManager<SequenceNumberT> SequenceNumberMgr; - std::map<SequenceNumberT, std::unique_ptr<detail::ResponseHandler<ChannelT>>> - PendingResponses; -}; - -} // end namespace detail - -template <typename ChannelT, typename FunctionIdT = uint32_t, - typename SequenceNumberT = uint32_t> -class MultiThreadedRPCEndpoint - : public detail::RPCEndpointBase< - MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>, - ChannelT, FunctionIdT, SequenceNumberT> { -private: - using BaseClass = detail::RPCEndpointBase< - MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>, - ChannelT, FunctionIdT, SequenceNumberT>; - -public: - MultiThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation) - : BaseClass(C, LazyAutoNegotiation) {} - - /// Add a handler for the given RPC function. - /// This installs the given handler functor for the given RPCFunction, and - /// makes the RPC function available for negotiation/calling from the remote. - template <typename Func, typename HandlerT> - void addHandler(HandlerT Handler) { - return this->template addHandlerImpl<Func>(std::move(Handler)); - } - - /// Add a class-method as a handler. - template <typename Func, typename ClassT, typename RetT, typename... ArgTs> - void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) { - addHandler<Func>( - detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method)); - } - - template <typename Func, typename HandlerT> - void addAsyncHandler(HandlerT Handler) { - return this->template addAsyncHandlerImpl<Func>(std::move(Handler)); - } - - /// Add a class-method as a handler. - template <typename Func, typename ClassT, typename RetT, typename... ArgTs> - void addAsyncHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) { - addAsyncHandler<Func>( - detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method)); - } - - /// Return type for non-blocking call primitives. - template <typename Func> - using NonBlockingCallResult = typename detail::ResultTraits< - typename Func::ReturnType>::ReturnFutureType; - - /// Call Func on Channel C. Does not block, does not call send. Returns a pair - /// of a future result and the sequence number assigned to the result. - /// - /// This utility function is primarily used for single-threaded mode support, - /// where the sequence number can be used to wait for the corresponding - /// result. In multi-threaded mode the appendCallNB method, which does not - /// return the sequence numeber, should be preferred. - template <typename Func, typename... ArgTs> - Expected<NonBlockingCallResult<Func>> appendCallNB(const ArgTs &...Args) { - using RTraits = detail::ResultTraits<typename Func::ReturnType>; - using ErrorReturn = typename RTraits::ErrorReturnType; - using ErrorReturnPromise = typename RTraits::ReturnPromiseType; - - ErrorReturnPromise Promise; - auto FutureResult = Promise.get_future(); - - if (auto Err = this->template appendCallAsync<Func>( - [Promise = std::move(Promise)](ErrorReturn RetOrErr) mutable { - Promise.set_value(std::move(RetOrErr)); - return Error::success(); - }, - Args...)) { - RTraits::consumeAbandoned(FutureResult.get()); - return std::move(Err); - } - return std::move(FutureResult); - } - - /// The same as appendCallNBWithSeq, except that it calls C.send() to - /// flush the channel after serializing the call. - template <typename Func, typename... ArgTs> - Expected<NonBlockingCallResult<Func>> callNB(const ArgTs &...Args) { - auto Result = appendCallNB<Func>(Args...); - if (!Result) - return Result; - if (auto Err = this->C.send()) { - this->abandonPendingResponses(); - detail::ResultTraits<typename Func::ReturnType>::consumeAbandoned( - std::move(Result->get())); - return std::move(Err); - } - return Result; - } - - /// Call Func on Channel C. Blocks waiting for a result. Returns an Error - /// for void functions or an Expected<T> for functions returning a T. - /// - /// This function is for use in threaded code where another thread is - /// handling responses and incoming calls. - template <typename Func, typename... ArgTs, - typename AltRetT = typename Func::ReturnType> - typename detail::ResultTraits<AltRetT>::ErrorReturnType - callB(const ArgTs &...Args) { - if (auto FutureResOrErr = callNB<Func>(Args...)) - return FutureResOrErr->get(); - else - return FutureResOrErr.takeError(); - } - - /// Handle incoming RPC calls. - Error handlerLoop() { - while (true) - if (auto Err = this->handleOne()) - return Err; - return Error::success(); - } -}; - -template <typename ChannelT, typename FunctionIdT = uint32_t, - typename SequenceNumberT = uint32_t> -class SingleThreadedRPCEndpoint - : public detail::RPCEndpointBase< - SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>, - ChannelT, FunctionIdT, SequenceNumberT> { -private: - using BaseClass = detail::RPCEndpointBase< - SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>, - ChannelT, FunctionIdT, SequenceNumberT>; - -public: - SingleThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation) - : BaseClass(C, LazyAutoNegotiation) {} - - template <typename Func, typename HandlerT> - void addHandler(HandlerT Handler) { - return this->template addHandlerImpl<Func>(std::move(Handler)); - } - - template <typename Func, typename ClassT, typename RetT, typename... ArgTs> - void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) { - addHandler<Func>( - detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method)); - } - - template <typename Func, typename HandlerT> - void addAsyncHandler(HandlerT Handler) { - return this->template addAsyncHandlerImpl<Func>(std::move(Handler)); - } - - /// Add a class-method as a handler. - template <typename Func, typename ClassT, typename RetT, typename... ArgTs> - void addAsyncHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) { - addAsyncHandler<Func>( - detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method)); - } - - template <typename Func, typename... ArgTs, - typename AltRetT = typename Func::ReturnType> - typename detail::ResultTraits<AltRetT>::ErrorReturnType - callB(const ArgTs &...Args) { - bool ReceivedResponse = false; - using AltRetTraits = detail::ResultTraits<AltRetT>; - using ResultType = typename AltRetTraits::ErrorReturnType; - ResultType Result = AltRetTraits::createBlankErrorReturnValue(); - - // We have to 'Check' result (which we know is in a success state at this - // point) so that it can be overwritten in the async handler. - (void)!!Result; - - if (Error Err = this->template appendCallAsync<Func>( - [&](ResultType R) { - Result = std::move(R); - ReceivedResponse = true; - return Error::success(); - }, - Args...)) { - AltRetTraits::consumeAbandoned(std::move(Result)); - return AltRetTraits::returnError(std::move(Err)); - } - - if (Error Err = this->C.send()) { - AltRetTraits::consumeAbandoned(std::move(Result)); - return AltRetTraits::returnError(std::move(Err)); - } - - while (!ReceivedResponse) { - if (Error Err = this->handleOne()) { - AltRetTraits::consumeAbandoned(std::move(Result)); - return AltRetTraits::returnError(std::move(Err)); - } - } - - return Result; - } -}; - -/// Asynchronous dispatch for a function on an RPC endpoint. -template <typename RPCClass, typename Func> class RPCAsyncDispatch { -public: - RPCAsyncDispatch(RPCClass &Endpoint) : Endpoint(Endpoint) {} - - template <typename HandlerT, typename... ArgTs> - Error operator()(HandlerT Handler, const ArgTs &...Args) const { - return Endpoint.template appendCallAsync<Func>(std::move(Handler), Args...); - } - -private: - RPCClass &Endpoint; -}; - -/// Construct an asynchronous dispatcher from an RPC endpoint and a Func. -template <typename Func, typename RPCEndpointT> -RPCAsyncDispatch<RPCEndpointT, Func> rpcAsyncDispatch(RPCEndpointT &Endpoint) { - return RPCAsyncDispatch<RPCEndpointT, Func>(Endpoint); -} - -/// Allows a set of asynchrounous calls to be dispatched, and then -/// waited on as a group. -class ParallelCallGroup { -public: - ParallelCallGroup() = default; - ParallelCallGroup(const ParallelCallGroup &) = delete; - ParallelCallGroup &operator=(const ParallelCallGroup &) = delete; - - /// Make as asynchronous call. - template <typename AsyncDispatcher, typename HandlerT, typename... ArgTs> - Error call(const AsyncDispatcher &AsyncDispatch, HandlerT Handler, - const ArgTs &...Args) { - // Increment the count of outstanding calls. This has to happen before - // we invoke the call, as the handler may (depending on scheduling) - // be run immediately on another thread, and we don't want the decrement - // in the wrapped handler below to run before the increment. - { - std::unique_lock<std::mutex> Lock(M); - ++NumOutstandingCalls; - } - - // Wrap the user handler in a lambda that will decrement the - // outstanding calls count, then poke the condition variable. - using ArgType = typename detail::ResponseHandlerArg< - typename detail::HandlerTraits<HandlerT>::Type>::ArgType; - auto WrappedHandler = [this, Handler = std::move(Handler)](ArgType Arg) { - auto Err = Handler(std::move(Arg)); - std::unique_lock<std::mutex> Lock(M); - --NumOutstandingCalls; - CV.notify_all(); - return Err; - }; - - return AsyncDispatch(std::move(WrappedHandler), Args...); - } - - /// Blocks until all calls have been completed and their return value - /// handlers run. - void wait() { - std::unique_lock<std::mutex> Lock(M); - while (NumOutstandingCalls > 0) - CV.wait(Lock); - } - -private: - std::mutex M; - std::condition_variable CV; - uint32_t NumOutstandingCalls = 0; -}; - -/// Convenience class for grouping RPCFunctions into APIs that can be -/// negotiated as a block. -/// -template <typename... Funcs> class APICalls { -public: - /// Test whether this API contains Function F. - template <typename F> class Contains { - public: - static const bool value = false; - }; - - /// Negotiate all functions in this API. - template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) { - return Error::success(); - } -}; - -template <typename Func, typename... Funcs> class APICalls<Func, Funcs...> { -public: - template <typename F> class Contains { - public: - static const bool value = std::is_same<F, Func>::value | - APICalls<Funcs...>::template Contains<F>::value; - }; - - template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) { - if (auto Err = R.template negotiateFunction<Func>()) - return Err; - return APICalls<Funcs...>::negotiate(R); - } -}; - -template <typename... InnerFuncs, typename... Funcs> -class APICalls<APICalls<InnerFuncs...>, Funcs...> { -public: - template <typename F> class Contains { - public: - static const bool value = - APICalls<InnerFuncs...>::template Contains<F>::value | - APICalls<Funcs...>::template Contains<F>::value; - }; - - template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) { - if (auto Err = APICalls<InnerFuncs...>::negotiate(R)) - return Err; - return APICalls<Funcs...>::negotiate(R); - } -}; - -} // end namespace shared -} // end namespace orc -} // end namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h deleted file mode 100644 index 2ee471939251..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h +++ /dev/null @@ -1,183 +0,0 @@ -//===- RawByteChannel.h -----------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H -#define LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/ExecutionEngine/Orc/Shared/Serialization.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/Error.h" -#include <cstdint> -#include <mutex> -#include <string> -#include <type_traits> - -namespace llvm { -namespace orc { -namespace shared { - -/// Interface for byte-streams to be used with ORC Serialization. -class RawByteChannel { -public: - virtual ~RawByteChannel() = default; - - /// Read Size bytes from the stream into *Dst. - virtual Error readBytes(char *Dst, unsigned Size) = 0; - - /// Read size bytes from *Src and append them to the stream. - virtual Error appendBytes(const char *Src, unsigned Size) = 0; - - /// Flush the stream if possible. - virtual Error send() = 0; - - /// Notify the channel that we're starting a message send. - /// Locks the channel for writing. - template <typename FunctionIdT, typename SequenceIdT> - Error startSendMessage(const FunctionIdT &FnId, const SequenceIdT &SeqNo) { - writeLock.lock(); - if (auto Err = serializeSeq(*this, FnId, SeqNo)) { - writeLock.unlock(); - return Err; - } - return Error::success(); - } - - /// Notify the channel that we're ending a message send. - /// Unlocks the channel for writing. - Error endSendMessage() { - writeLock.unlock(); - return Error::success(); - } - - /// Notify the channel that we're starting a message receive. - /// Locks the channel for reading. - template <typename FunctionIdT, typename SequenceNumberT> - Error startReceiveMessage(FunctionIdT &FnId, SequenceNumberT &SeqNo) { - readLock.lock(); - if (auto Err = deserializeSeq(*this, FnId, SeqNo)) { - readLock.unlock(); - return Err; - } - return Error::success(); - } - - /// Notify the channel that we're ending a message receive. - /// Unlocks the channel for reading. - Error endReceiveMessage() { - readLock.unlock(); - return Error::success(); - } - - /// Get the lock for stream reading. - std::mutex &getReadLock() { return readLock; } - - /// Get the lock for stream writing. - std::mutex &getWriteLock() { return writeLock; } - -private: - std::mutex readLock, writeLock; -}; - -template <typename ChannelT, typename T> -class SerializationTraits< - ChannelT, T, T, - std::enable_if_t< - std::is_base_of<RawByteChannel, ChannelT>::value && - (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value || - std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value || - std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value || - std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value || - std::is_same<T, char>::value)>> { -public: - static Error serialize(ChannelT &C, T V) { - support::endian::byte_swap<T, support::big>(V); - return C.appendBytes(reinterpret_cast<const char *>(&V), sizeof(T)); - }; - - static Error deserialize(ChannelT &C, T &V) { - if (auto Err = C.readBytes(reinterpret_cast<char *>(&V), sizeof(T))) - return Err; - support::endian::byte_swap<T, support::big>(V); - return Error::success(); - }; -}; - -template <typename ChannelT> -class SerializationTraits< - ChannelT, bool, bool, - std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> { -public: - static Error serialize(ChannelT &C, bool V) { - uint8_t Tmp = V ? 1 : 0; - if (auto Err = C.appendBytes(reinterpret_cast<const char *>(&Tmp), 1)) - return Err; - return Error::success(); - } - - static Error deserialize(ChannelT &C, bool &V) { - uint8_t Tmp = 0; - if (auto Err = C.readBytes(reinterpret_cast<char *>(&Tmp), 1)) - return Err; - V = Tmp != 0; - return Error::success(); - } -}; - -template <typename ChannelT> -class SerializationTraits< - ChannelT, std::string, StringRef, - std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> { -public: - /// Serialization channel serialization for std::strings. - static Error serialize(RawByteChannel &C, StringRef S) { - if (auto Err = serializeSeq(C, static_cast<uint64_t>(S.size()))) - return Err; - return C.appendBytes((const char *)S.data(), S.size()); - } -}; - -template <typename ChannelT, typename T> -class SerializationTraits< - ChannelT, std::string, T, - std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value && - (std::is_same<T, const char *>::value || - std::is_same<T, char *>::value)>> { -public: - static Error serialize(RawByteChannel &C, const char *S) { - return SerializationTraits<ChannelT, std::string, StringRef>::serialize(C, - S); - } -}; - -template <typename ChannelT> -class SerializationTraits< - ChannelT, std::string, std::string, - std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> { -public: - /// Serialization channel serialization for std::strings. - static Error serialize(RawByteChannel &C, const std::string &S) { - return SerializationTraits<ChannelT, std::string, StringRef>::serialize(C, - S); - } - - /// Serialization channel deserialization for std::strings. - static Error deserialize(RawByteChannel &C, std::string &S) { - uint64_t Count = 0; - if (auto Err = deserializeSeq(C, Count)) - return Err; - S.resize(Count); - return C.readBytes(&S[0], Count); - } -}; - -} // end namespace shared -} // end namespace orc -} // end namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h deleted file mode 100644 index 0ea483ba2abb..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h +++ /dev/null @@ -1,769 +0,0 @@ -//===- Serialization.h ------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H -#define LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h" -#include "llvm/Support/thread.h" -#include <map> -#include <mutex> -#include <set> -#include <sstream> -#include <string> -#include <vector> - -namespace llvm { -namespace orc { -namespace shared { - -template <typename T> class SerializationTypeName; - -/// TypeNameSequence is a utility for rendering sequences of types to a string -/// by rendering each type, separated by ", ". -template <typename... ArgTs> class SerializationTypeNameSequence {}; - -/// Render an empty TypeNameSequence to an ostream. -template <typename OStream> -OStream &operator<<(OStream &OS, const SerializationTypeNameSequence<> &V) { - return OS; -} - -/// Render a TypeNameSequence of a single type to an ostream. -template <typename OStream, typename ArgT> -OStream &operator<<(OStream &OS, const SerializationTypeNameSequence<ArgT> &V) { - OS << SerializationTypeName<ArgT>::getName(); - return OS; -} - -/// Render a TypeNameSequence of more than one type to an ostream. -template <typename OStream, typename ArgT1, typename ArgT2, typename... ArgTs> -OStream & -operator<<(OStream &OS, - const SerializationTypeNameSequence<ArgT1, ArgT2, ArgTs...> &V) { - OS << SerializationTypeName<ArgT1>::getName() << ", " - << SerializationTypeNameSequence<ArgT2, ArgTs...>(); - return OS; -} - -template <> class SerializationTypeName<void> { -public: - static const char *getName() { return "void"; } -}; - -template <> class SerializationTypeName<int8_t> { -public: - static const char *getName() { return "int8_t"; } -}; - -template <> class SerializationTypeName<uint8_t> { -public: - static const char *getName() { return "uint8_t"; } -}; - -template <> class SerializationTypeName<int16_t> { -public: - static const char *getName() { return "int16_t"; } -}; - -template <> class SerializationTypeName<uint16_t> { -public: - static const char *getName() { return "uint16_t"; } -}; - -template <> class SerializationTypeName<int32_t> { -public: - static const char *getName() { return "int32_t"; } -}; - -template <> class SerializationTypeName<uint32_t> { -public: - static const char *getName() { return "uint32_t"; } -}; - -template <> class SerializationTypeName<int64_t> { -public: - static const char *getName() { return "int64_t"; } -}; - -template <> class SerializationTypeName<uint64_t> { -public: - static const char *getName() { return "uint64_t"; } -}; - -template <> class SerializationTypeName<bool> { -public: - static const char *getName() { return "bool"; } -}; - -template <> class SerializationTypeName<std::string> { -public: - static const char *getName() { return "std::string"; } -}; - -template <> class SerializationTypeName<Error> { -public: - static const char *getName() { return "Error"; } -}; - -template <typename T> class SerializationTypeName<Expected<T>> { -public: - static const char *getName() { - static std::string Name = [] { - std::string Name; - raw_string_ostream(Name) - << "Expected<" << SerializationTypeNameSequence<T>() << ">"; - return Name; - }(); - return Name.data(); - } -}; - -template <typename T1, typename T2> -class SerializationTypeName<std::pair<T1, T2>> { -public: - static const char *getName() { - static std::string Name = [] { - std::string Name; - raw_string_ostream(Name) - << "std::pair<" << SerializationTypeNameSequence<T1, T2>() << ">"; - return Name; - }(); - return Name.data(); - } -}; - -template <typename... ArgTs> class SerializationTypeName<std::tuple<ArgTs...>> { -public: - static const char *getName() { - static std::string Name = [] { - std::string Name; - raw_string_ostream(Name) - << "std::tuple<" << SerializationTypeNameSequence<ArgTs...>() << ">"; - return Name; - }(); - return Name.data(); - } -}; - -template <typename T> class SerializationTypeName<Optional<T>> { -public: - static const char *getName() { - static std::string Name = [] { - std::string Name; - raw_string_ostream(Name) - << "Optional<" << SerializationTypeName<T>::getName() << ">"; - return Name; - }(); - return Name.data(); - } -}; - -template <typename T> class SerializationTypeName<std::vector<T>> { -public: - static const char *getName() { - static std::string Name = [] { - std::string Name; - raw_string_ostream(Name) - << "std::vector<" << SerializationTypeName<T>::getName() << ">"; - return Name; - }(); - return Name.data(); - } -}; - -template <typename T> class SerializationTypeName<std::set<T>> { -public: - static const char *getName() { - static std::string Name = [] { - std::string Name; - raw_string_ostream(Name) - << "std::set<" << SerializationTypeName<T>::getName() << ">"; - return Name; - }(); - return Name.data(); - } -}; - -template <typename K, typename V> class SerializationTypeName<std::map<K, V>> { -public: - static const char *getName() { - static std::string Name = [] { - std::string Name; - raw_string_ostream(Name) - << "std::map<" << SerializationTypeNameSequence<K, V>() << ">"; - return Name; - }(); - return Name.data(); - } -}; - -/// The SerializationTraits<ChannelT, T> class describes how to serialize and -/// deserialize an instance of type T to/from an abstract channel of type -/// ChannelT. It also provides a representation of the type's name via the -/// getName method. -/// -/// Specializations of this class should provide the following functions: -/// -/// @code{.cpp} -/// -/// static const char* getName(); -/// static Error serialize(ChannelT&, const T&); -/// static Error deserialize(ChannelT&, T&); -/// -/// @endcode -/// -/// The third argument of SerializationTraits is intended to support SFINAE. -/// E.g.: -/// -/// @code{.cpp} -/// -/// class MyVirtualChannel { ... }; -/// -/// template <DerivedChannelT> -/// class SerializationTraits<DerivedChannelT, bool, -/// std::enable_if_t< -/// std::is_base_of<VirtChannel, DerivedChannel>::value -/// >> { -/// public: -/// static const char* getName() { ... }; -/// } -/// -/// @endcode -template <typename ChannelT, typename WireType, - typename ConcreteType = WireType, typename = void> -class SerializationTraits; - -template <typename ChannelT> class SequenceTraits { -public: - static Error emitSeparator(ChannelT &C) { return Error::success(); } - static Error consumeSeparator(ChannelT &C) { return Error::success(); } -}; - -/// Utility class for serializing sequences of values of varying types. -/// Specializations of this class contain 'serialize' and 'deserialize' methods -/// for the given channel. The ArgTs... list will determine the "over-the-wire" -/// types to be serialized. The serialize and deserialize methods take a list -/// CArgTs... ("caller arg types") which must be the same length as ArgTs..., -/// but may be different types from ArgTs, provided that for each CArgT there -/// is a SerializationTraits specialization -/// SerializeTraits<ChannelT, ArgT, CArgT> with methods that can serialize the -/// caller argument to over-the-wire value. -template <typename ChannelT, typename... ArgTs> class SequenceSerialization; - -template <typename ChannelT> class SequenceSerialization<ChannelT> { -public: - static Error serialize(ChannelT &C) { return Error::success(); } - static Error deserialize(ChannelT &C) { return Error::success(); } -}; - -template <typename ChannelT, typename ArgT> -class SequenceSerialization<ChannelT, ArgT> { -public: - template <typename CArgT> static Error serialize(ChannelT &C, CArgT &&CArg) { - return SerializationTraits<ChannelT, ArgT, std::decay_t<CArgT>>::serialize( - C, std::forward<CArgT>(CArg)); - } - - template <typename CArgT> static Error deserialize(ChannelT &C, CArgT &CArg) { - return SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg); - } -}; - -template <typename ChannelT, typename ArgT, typename... ArgTs> -class SequenceSerialization<ChannelT, ArgT, ArgTs...> { -public: - template <typename CArgT, typename... CArgTs> - static Error serialize(ChannelT &C, CArgT &&CArg, CArgTs &&...CArgs) { - if (auto Err = - SerializationTraits<ChannelT, ArgT, std::decay_t<CArgT>>::serialize( - C, std::forward<CArgT>(CArg))) - return Err; - if (auto Err = SequenceTraits<ChannelT>::emitSeparator(C)) - return Err; - return SequenceSerialization<ChannelT, ArgTs...>::serialize( - C, std::forward<CArgTs>(CArgs)...); - } - - template <typename CArgT, typename... CArgTs> - static Error deserialize(ChannelT &C, CArgT &CArg, CArgTs &...CArgs) { - if (auto Err = - SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg)) - return Err; - if (auto Err = SequenceTraits<ChannelT>::consumeSeparator(C)) - return Err; - return SequenceSerialization<ChannelT, ArgTs...>::deserialize(C, CArgs...); - } -}; - -template <typename ChannelT, typename... ArgTs> -Error serializeSeq(ChannelT &C, ArgTs &&...Args) { - return SequenceSerialization<ChannelT, std::decay_t<ArgTs>...>::serialize( - C, std::forward<ArgTs>(Args)...); -} - -template <typename ChannelT, typename... ArgTs> -Error deserializeSeq(ChannelT &C, ArgTs &...Args) { - return SequenceSerialization<ChannelT, ArgTs...>::deserialize(C, Args...); -} - -template <typename ChannelT> class SerializationTraits<ChannelT, Error> { -public: - using WrappedErrorSerializer = - std::function<Error(ChannelT &C, const ErrorInfoBase &)>; - - using WrappedErrorDeserializer = - std::function<Error(ChannelT &C, Error &Err)>; - - template <typename ErrorInfoT, typename SerializeFtor, - typename DeserializeFtor> - static void registerErrorType(std::string Name, SerializeFtor Serialize, - DeserializeFtor Deserialize) { - assert(!Name.empty() && - "The empty string is reserved for the Success value"); - - const std::string *KeyName = nullptr; - { - // We're abusing the stability of std::map here: We take a reference to - // the key of the deserializers map to save us from duplicating the string - // in the serializer. This should be changed to use a stringpool if we - // switch to a map type that may move keys in memory. - std::lock_guard<std::recursive_mutex> Lock(DeserializersMutex); - auto I = Deserializers.insert( - Deserializers.begin(), - std::make_pair(std::move(Name), std::move(Deserialize))); - KeyName = &I->first; - } - - { - assert(KeyName != nullptr && "No keyname pointer"); - std::lock_guard<std::recursive_mutex> Lock(SerializersMutex); - Serializers[ErrorInfoT::classID()] = - [KeyName, Serialize = std::move(Serialize)]( - ChannelT &C, const ErrorInfoBase &EIB) -> Error { - assert(EIB.dynamicClassID() == ErrorInfoT::classID() && - "Serializer called for wrong error type"); - if (auto Err = serializeSeq(C, *KeyName)) - return Err; - return Serialize(C, static_cast<const ErrorInfoT &>(EIB)); - }; - } - } - - static Error serialize(ChannelT &C, Error &&Err) { - std::lock_guard<std::recursive_mutex> Lock(SerializersMutex); - - if (!Err) - return serializeSeq(C, std::string()); - - return handleErrors(std::move(Err), [&C](const ErrorInfoBase &EIB) { - auto SI = Serializers.find(EIB.dynamicClassID()); - if (SI == Serializers.end()) - return serializeAsStringError(C, EIB); - return (SI->second)(C, EIB); - }); - } - - static Error deserialize(ChannelT &C, Error &Err) { - std::lock_guard<std::recursive_mutex> Lock(DeserializersMutex); - - std::string Key; - if (auto Err = deserializeSeq(C, Key)) - return Err; - - if (Key.empty()) { - ErrorAsOutParameter EAO(&Err); - Err = Error::success(); - return Error::success(); - } - - auto DI = Deserializers.find(Key); - assert(DI != Deserializers.end() && "No deserializer for error type"); - return (DI->second)(C, Err); - } - -private: - static Error serializeAsStringError(ChannelT &C, const ErrorInfoBase &EIB) { - std::string ErrMsg; - { - raw_string_ostream ErrMsgStream(ErrMsg); - EIB.log(ErrMsgStream); - } - return serialize(C, make_error<StringError>(std::move(ErrMsg), - inconvertibleErrorCode())); - } - - static std::recursive_mutex SerializersMutex; - static std::recursive_mutex DeserializersMutex; - static std::map<const void *, WrappedErrorSerializer> Serializers; - static std::map<std::string, WrappedErrorDeserializer> Deserializers; -}; - -template <typename ChannelT> -std::recursive_mutex SerializationTraits<ChannelT, Error>::SerializersMutex; - -template <typename ChannelT> -std::recursive_mutex SerializationTraits<ChannelT, Error>::DeserializersMutex; - -template <typename ChannelT> -std::map<const void *, - typename SerializationTraits<ChannelT, Error>::WrappedErrorSerializer> - SerializationTraits<ChannelT, Error>::Serializers; - -template <typename ChannelT> -std::map<std::string, typename SerializationTraits< - ChannelT, Error>::WrappedErrorDeserializer> - SerializationTraits<ChannelT, Error>::Deserializers; - -/// Registers a serializer and deserializer for the given error type on the -/// given channel type. -template <typename ChannelT, typename ErrorInfoT, typename SerializeFtor, - typename DeserializeFtor> -void registerErrorSerialization(std::string Name, SerializeFtor &&Serialize, - DeserializeFtor &&Deserialize) { - SerializationTraits<ChannelT, Error>::template registerErrorType<ErrorInfoT>( - std::move(Name), std::forward<SerializeFtor>(Serialize), - std::forward<DeserializeFtor>(Deserialize)); -} - -/// Registers serialization/deserialization for StringError. -template <typename ChannelT> void registerStringError() { - static bool AlreadyRegistered = false; - if (!AlreadyRegistered) { - registerErrorSerialization<ChannelT, StringError>( - "StringError", - [](ChannelT &C, const StringError &SE) { - return serializeSeq(C, SE.getMessage()); - }, - [](ChannelT &C, Error &Err) -> Error { - ErrorAsOutParameter EAO(&Err); - std::string Msg; - if (auto E2 = deserializeSeq(C, Msg)) - return E2; - Err = make_error<StringError>( - std::move(Msg), - orcError(OrcErrorCode::UnknownErrorCodeFromRemote)); - return Error::success(); - }); - AlreadyRegistered = true; - } -} - -/// SerializationTraits for Expected<T1> from an Expected<T2>. -template <typename ChannelT, typename T1, typename T2> -class SerializationTraits<ChannelT, Expected<T1>, Expected<T2>> { -public: - static Error serialize(ChannelT &C, Expected<T2> &&ValOrErr) { - if (ValOrErr) { - if (auto Err = serializeSeq(C, true)) - return Err; - return SerializationTraits<ChannelT, T1, T2>::serialize(C, *ValOrErr); - } - if (auto Err = serializeSeq(C, false)) - return Err; - return serializeSeq(C, ValOrErr.takeError()); - } - - static Error deserialize(ChannelT &C, Expected<T2> &ValOrErr) { - ExpectedAsOutParameter<T2> EAO(&ValOrErr); - bool HasValue; - if (auto Err = deserializeSeq(C, HasValue)) - return Err; - if (HasValue) - return SerializationTraits<ChannelT, T1, T2>::deserialize(C, *ValOrErr); - Error Err = Error::success(); - if (auto E2 = deserializeSeq(C, Err)) - return E2; - ValOrErr = std::move(Err); - return Error::success(); - } -}; - -/// SerializationTraits for Expected<T1> from a T2. -template <typename ChannelT, typename T1, typename T2> -class SerializationTraits<ChannelT, Expected<T1>, T2> { -public: - static Error serialize(ChannelT &C, T2 &&Val) { - return serializeSeq(C, Expected<T2>(std::forward<T2>(Val))); - } -}; - -/// SerializationTraits for Expected<T1> from an Error. -template <typename ChannelT, typename T> -class SerializationTraits<ChannelT, Expected<T>, Error> { -public: - static Error serialize(ChannelT &C, Error &&Err) { - return serializeSeq(C, Expected<T>(std::move(Err))); - } -}; - -/// SerializationTraits default specialization for std::pair. -template <typename ChannelT, typename T1, typename T2, typename T3, typename T4> -class SerializationTraits<ChannelT, std::pair<T1, T2>, std::pair<T3, T4>> { -public: - static Error serialize(ChannelT &C, const std::pair<T3, T4> &V) { - if (auto Err = SerializationTraits<ChannelT, T1, T3>::serialize(C, V.first)) - return Err; - return SerializationTraits<ChannelT, T2, T4>::serialize(C, V.second); - } - - static Error deserialize(ChannelT &C, std::pair<T3, T4> &V) { - if (auto Err = - SerializationTraits<ChannelT, T1, T3>::deserialize(C, V.first)) - return Err; - return SerializationTraits<ChannelT, T2, T4>::deserialize(C, V.second); - } -}; - -/// SerializationTraits default specialization for std::tuple. -template <typename ChannelT, typename... ArgTs> -class SerializationTraits<ChannelT, std::tuple<ArgTs...>> { -public: - /// RPC channel serialization for std::tuple. - static Error serialize(ChannelT &C, const std::tuple<ArgTs...> &V) { - return serializeTupleHelper(C, V, std::index_sequence_for<ArgTs...>()); - } - - /// RPC channel deserialization for std::tuple. - static Error deserialize(ChannelT &C, std::tuple<ArgTs...> &V) { - return deserializeTupleHelper(C, V, std::index_sequence_for<ArgTs...>()); - } - -private: - // Serialization helper for std::tuple. - template <size_t... Is> - static Error serializeTupleHelper(ChannelT &C, const std::tuple<ArgTs...> &V, - std::index_sequence<Is...> _) { - return serializeSeq(C, std::get<Is>(V)...); - } - - // Serialization helper for std::tuple. - template <size_t... Is> - static Error deserializeTupleHelper(ChannelT &C, std::tuple<ArgTs...> &V, - std::index_sequence<Is...> _) { - return deserializeSeq(C, std::get<Is>(V)...); - } -}; - -template <typename ChannelT, typename T> -class SerializationTraits<ChannelT, Optional<T>> { -public: - /// Serialize an Optional<T>. - static Error serialize(ChannelT &C, const Optional<T> &O) { - if (auto Err = serializeSeq(C, O != None)) - return Err; - if (O) - if (auto Err = serializeSeq(C, *O)) - return Err; - return Error::success(); - } - - /// Deserialize an Optional<T>. - static Error deserialize(ChannelT &C, Optional<T> &O) { - bool HasValue = false; - if (auto Err = deserializeSeq(C, HasValue)) - return Err; - if (HasValue) - if (auto Err = deserializeSeq(C, *O)) - return Err; - return Error::success(); - }; -}; - -/// SerializationTraits default specialization for std::vector. -template <typename ChannelT, typename T> -class SerializationTraits<ChannelT, std::vector<T>> { -public: - /// Serialize a std::vector<T> from std::vector<T>. - static Error serialize(ChannelT &C, const std::vector<T> &V) { - if (auto Err = serializeSeq(C, static_cast<uint64_t>(V.size()))) - return Err; - - for (const auto &E : V) - if (auto Err = serializeSeq(C, E)) - return Err; - - return Error::success(); - } - - /// Deserialize a std::vector<T> to a std::vector<T>. - static Error deserialize(ChannelT &C, std::vector<T> &V) { - assert(V.empty() && - "Expected default-constructed vector to deserialize into"); - - uint64_t Count = 0; - if (auto Err = deserializeSeq(C, Count)) - return Err; - - V.resize(Count); - for (auto &E : V) - if (auto Err = deserializeSeq(C, E)) - return Err; - - return Error::success(); - } -}; - -/// Enable vector serialization from an ArrayRef. -template <typename ChannelT, typename T> -class SerializationTraits<ChannelT, std::vector<T>, ArrayRef<T>> { -public: - static Error serialize(ChannelT &C, ArrayRef<T> V) { - if (auto Err = serializeSeq(C, static_cast<uint64_t>(V.size()))) - return Err; - - for (const auto &E : V) - if (auto Err = serializeSeq(C, E)) - return Err; - - return Error::success(); - } -}; - -template <typename ChannelT, typename T, typename T2> -class SerializationTraits<ChannelT, std::set<T>, std::set<T2>> { -public: - /// Serialize a std::set<T> from std::set<T2>. - static Error serialize(ChannelT &C, const std::set<T2> &S) { - if (auto Err = serializeSeq(C, static_cast<uint64_t>(S.size()))) - return Err; - - for (const auto &E : S) - if (auto Err = SerializationTraits<ChannelT, T, T2>::serialize(C, E)) - return Err; - - return Error::success(); - } - - /// Deserialize a std::set<T> to a std::set<T>. - static Error deserialize(ChannelT &C, std::set<T2> &S) { - assert(S.empty() && "Expected default-constructed set to deserialize into"); - - uint64_t Count = 0; - if (auto Err = deserializeSeq(C, Count)) - return Err; - - while (Count-- != 0) { - T2 Val; - if (auto Err = SerializationTraits<ChannelT, T, T2>::deserialize(C, Val)) - return Err; - - auto Added = S.insert(Val).second; - if (!Added) - return make_error<StringError>("Duplicate element in deserialized set", - orcError(OrcErrorCode::UnknownORCError)); - } - - return Error::success(); - } -}; - -template <typename ChannelT, typename K, typename V, typename K2, typename V2> -class SerializationTraits<ChannelT, std::map<K, V>, std::map<K2, V2>> { -public: - /// Serialize a std::map<K, V> from std::map<K2, V2>. - static Error serialize(ChannelT &C, const std::map<K2, V2> &M) { - if (auto Err = serializeSeq(C, static_cast<uint64_t>(M.size()))) - return Err; - - for (const auto &E : M) { - if (auto Err = - SerializationTraits<ChannelT, K, K2>::serialize(C, E.first)) - return Err; - if (auto Err = - SerializationTraits<ChannelT, V, V2>::serialize(C, E.second)) - return Err; - } - - return Error::success(); - } - - /// Deserialize a std::map<K, V> to a std::map<K, V>. - static Error deserialize(ChannelT &C, std::map<K2, V2> &M) { - assert(M.empty() && "Expected default-constructed map to deserialize into"); - - uint64_t Count = 0; - if (auto Err = deserializeSeq(C, Count)) - return Err; - - while (Count-- != 0) { - std::pair<K2, V2> Val; - if (auto Err = - SerializationTraits<ChannelT, K, K2>::deserialize(C, Val.first)) - return Err; - - if (auto Err = - SerializationTraits<ChannelT, V, V2>::deserialize(C, Val.second)) - return Err; - - auto Added = M.insert(Val).second; - if (!Added) - return make_error<StringError>("Duplicate element in deserialized map", - orcError(OrcErrorCode::UnknownORCError)); - } - - return Error::success(); - } -}; - -template <typename ChannelT, typename K, typename V, typename K2, typename V2> -class SerializationTraits<ChannelT, std::map<K, V>, DenseMap<K2, V2>> { -public: - /// Serialize a std::map<K, V> from DenseMap<K2, V2>. - static Error serialize(ChannelT &C, const DenseMap<K2, V2> &M) { - if (auto Err = serializeSeq(C, static_cast<uint64_t>(M.size()))) - return Err; - - for (auto &E : M) { - if (auto Err = - SerializationTraits<ChannelT, K, K2>::serialize(C, E.first)) - return Err; - - if (auto Err = - SerializationTraits<ChannelT, V, V2>::serialize(C, E.second)) - return Err; - } - - return Error::success(); - } - - /// Serialize a std::map<K, V> from DenseMap<K2, V2>. - static Error deserialize(ChannelT &C, DenseMap<K2, V2> &M) { - assert(M.empty() && "Expected default-constructed map to deserialize into"); - - uint64_t Count = 0; - if (auto Err = deserializeSeq(C, Count)) - return Err; - - while (Count-- != 0) { - std::pair<K2, V2> Val; - if (auto Err = - SerializationTraits<ChannelT, K, K2>::deserialize(C, Val.first)) - return Err; - - if (auto Err = - SerializationTraits<ChannelT, V, V2>::deserialize(C, Val.second)) - return Err; - - auto Added = M.insert(Val).second; - if (!Added) - return make_error<StringError>("Duplicate element in deserialized map", - orcError(OrcErrorCode::UnknownORCError)); - } - - return Error::success(); - } -}; - -} // namespace shared -} // end namespace orc -} // end namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h index 854f1098d5af..9ac13a493e9d 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h @@ -33,10 +33,12 @@ #define LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEPACKEDSERIALIZATION_H #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" #include "llvm/Support/SwapByteOrder.h" +#include <limits> #include <string> #include <tuple> #include <type_traits> @@ -193,13 +195,6 @@ template <typename SPSElementTagT> class SPSSequence; /// SPS tag type for strings, which are equivalent to sequences of chars. using SPSString = SPSSequence<char>; -/// SPS tag type for executor addresseses. -class SPSExecutorAddress {}; - -template <> -class SPSSerializationTraits<SPSExecutorAddress, uint64_t> - : public SPSSerializationTraits<uint64_t, uint64_t> {}; - /// SPS tag type for maps. /// /// SPS maps are just sequences of (Key, Value) tuples. @@ -289,6 +284,40 @@ public: } }; +/// Trivial ArrayRef<T> -> SPSSequence<SPSElementTagT> serialization. +template <typename SPSElementTagT, typename T> +class TrivialSPSSequenceSerialization<SPSElementTagT, ArrayRef<T>> { +public: + static constexpr bool available = true; +}; + +/// Specialized SPSSequence<char> -> ArrayRef<char> serialization. +/// +/// On deserialize, points directly into the input buffer. +template <> class SPSSerializationTraits<SPSSequence<char>, ArrayRef<char>> { +public: + static size_t size(const ArrayRef<char> &A) { + return SPSArgList<uint64_t>::size(static_cast<uint64_t>(A.size())) + + A.size(); + } + + static bool serialize(SPSOutputBuffer &OB, const ArrayRef<char> &A) { + if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(A.size()))) + return false; + return OB.write(A.data(), A.size()); + } + + static bool deserialize(SPSInputBuffer &IB, ArrayRef<char> &A) { + uint64_t Size; + if (!SPSArgList<uint64_t>::deserialize(IB, Size)) + return false; + if (Size > std::numeric_limits<size_t>::max()) + return false; + A = {IB.data(), static_cast<size_t>(Size)}; + return IB.skip(Size); + } +}; + /// 'Trivial' sequence serialization: Sequence is serialized as a uint64_t size /// followed by a for-earch loop over the elements of the sequence to serialize /// each of them. @@ -330,6 +359,44 @@ public: } }; +/// SPSTuple serialization for std::tuple. +template <typename... SPSTagTs, typename... Ts> +class SPSSerializationTraits<SPSTuple<SPSTagTs...>, std::tuple<Ts...>> { +private: + using TupleArgList = typename SPSTuple<SPSTagTs...>::AsArgList; + using ArgIndices = std::make_index_sequence<sizeof...(Ts)>; + + template <std::size_t... I> + static size_t size(const std::tuple<Ts...> &T, std::index_sequence<I...>) { + return TupleArgList::size(std::get<I>(T)...); + } + + template <std::size_t... I> + static bool serialize(SPSOutputBuffer &OB, const std::tuple<Ts...> &T, + std::index_sequence<I...>) { + return TupleArgList::serialize(OB, std::get<I>(T)...); + } + + template <std::size_t... I> + static bool deserialize(SPSInputBuffer &IB, std::tuple<Ts...> &T, + std::index_sequence<I...>) { + return TupleArgList::deserialize(IB, std::get<I>(T)...); + } + +public: + static size_t size(const std::tuple<Ts...> &T) { + return size(T, ArgIndices{}); + } + + static bool serialize(SPSOutputBuffer &OB, const std::tuple<Ts...> &T) { + return serialize(OB, T, ArgIndices{}); + } + + static bool deserialize(SPSInputBuffer &IB, std::tuple<Ts...> &T) { + return deserialize(IB, T, ArgIndices{}); + } +}; + /// SPSTuple serialization for std::pair. template <typename SPSTagT1, typename SPSTagT2, typename T1, typename T2> class SPSSerializationTraits<SPSTuple<SPSTagT1, SPSTagT2>, std::pair<T1, T2>> { @@ -380,6 +447,49 @@ public: } }; +/// Serialization for StringMap<ValueT>s. +template <typename SPSValueT, typename ValueT> +class SPSSerializationTraits<SPSSequence<SPSTuple<SPSString, SPSValueT>>, + StringMap<ValueT>> { +public: + static size_t size(const StringMap<ValueT> &M) { + size_t Sz = SPSArgList<uint64_t>::size(static_cast<uint64_t>(M.size())); + for (auto &E : M) + Sz += SPSArgList<SPSString, SPSValueT>::size(E.first(), E.second); + return Sz; + } + + static bool serialize(SPSOutputBuffer &OB, const StringMap<ValueT> &M) { + if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(M.size()))) + return false; + + for (auto &E : M) + if (!SPSArgList<SPSString, SPSValueT>::serialize(OB, E.first(), E.second)) + return false; + + return true; + } + + static bool deserialize(SPSInputBuffer &IB, StringMap<ValueT> &M) { + uint64_t Size; + assert(M.empty() && "M already contains elements"); + + if (!SPSArgList<uint64_t>::deserialize(IB, Size)) + return false; + + while (Size--) { + StringRef S; + ValueT V; + if (!SPSArgList<SPSString, SPSValueT>::deserialize(IB, S, V)) + return false; + if (!M.insert(std::make_pair(S, V)).second) + return false; + } + + return true; + } +}; + /// SPS tag type for errors. class SPSError; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h new file mode 100644 index 000000000000..9e074ed1f931 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h @@ -0,0 +1,235 @@ +//===--- SimpleRemoteEPCUtils.h - Utils for Simple Remote EPC ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Message definitions and other utilities for SimpleRemoteEPC and +// SimpleRemoteEPCServer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEREMOTEEPCUTILS_H +#define LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEREMOTEEPCUTILS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" +#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h" +#include "llvm/Support/Error.h" + +#include <atomic> +#include <mutex> +#include <string> +#include <thread> + +namespace llvm { +namespace orc { + +namespace SimpleRemoteEPCDefaultBootstrapSymbolNames { +extern const char *ExecutorSessionObjectName; +extern const char *DispatchFnName; +} // end namespace SimpleRemoteEPCDefaultBootstrapSymbolNames + +enum class SimpleRemoteEPCOpcode : uint8_t { + Setup, + Hangup, + Result, + CallWrapper, + LastOpC = CallWrapper +}; + +struct SimpleRemoteEPCExecutorInfo { + std::string TargetTriple; + uint64_t PageSize; + StringMap<ExecutorAddr> BootstrapSymbols; +}; + +using SimpleRemoteEPCArgBytesVector = SmallVector<char, 128>; + +class SimpleRemoteEPCTransportClient { +public: + enum HandleMessageAction { ContinueSession, EndSession }; + + virtual ~SimpleRemoteEPCTransportClient(); + + /// Handle receipt of a message. + /// + /// Returns an Error if the message cannot be handled, 'EndSession' if the + /// client will not accept any further messages, and 'ContinueSession' + /// otherwise. + virtual Expected<HandleMessageAction> + handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, ExecutorAddr TagAddr, + SimpleRemoteEPCArgBytesVector ArgBytes) = 0; + + /// Handle a disconnection from the underlying transport. No further messages + /// should be sent to handleMessage after this is called. + /// Err may contain an Error value indicating unexpected disconnection. This + /// allows clients to log such errors, but no attempt should be made at + /// recovery (which should be handled inside the transport class, if it is + /// supported at all). + virtual void handleDisconnect(Error Err) = 0; +}; + +class SimpleRemoteEPCTransport { +public: + virtual ~SimpleRemoteEPCTransport(); + + /// Called during setup of the client to indicate that the client is ready + /// to receive messages. + /// + /// Transport objects should not access the client until this method is + /// called. + virtual Error start() = 0; + + /// Send a SimpleRemoteEPC message. + /// + /// This function may be called concurrently. Subclasses should implement + /// locking if required for the underlying transport. + virtual Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, + ExecutorAddr TagAddr, ArrayRef<char> ArgBytes) = 0; + + /// Trigger disconnection from the transport. The implementation should + /// respond by calling handleDisconnect on the client once disconnection + /// is complete. May be called more than once and from different threads. + virtual void disconnect() = 0; +}; + +/// Uses read/write on FileDescriptors for transport. +class FDSimpleRemoteEPCTransport : public SimpleRemoteEPCTransport { +public: + /// Create a FDSimpleRemoteEPCTransport using the given FDs for + /// reading (InFD) and writing (OutFD). + static Expected<std::unique_ptr<FDSimpleRemoteEPCTransport>> + Create(SimpleRemoteEPCTransportClient &C, int InFD, int OutFD); + + /// Create a FDSimpleRemoteEPCTransport using the given FD for both + /// reading and writing. + static Expected<std::unique_ptr<FDSimpleRemoteEPCTransport>> + Create(SimpleRemoteEPCTransportClient &C, int FD) { + return Create(C, FD, FD); + } + + ~FDSimpleRemoteEPCTransport() override; + + Error start() override; + + Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, + ExecutorAddr TagAddr, ArrayRef<char> ArgBytes) override; + + void disconnect() override; + +private: + FDSimpleRemoteEPCTransport(SimpleRemoteEPCTransportClient &C, int InFD, + int OutFD) + : C(C), InFD(InFD), OutFD(OutFD) {} + + Error readBytes(char *Dst, size_t Size, bool *IsEOF = nullptr); + int writeBytes(const char *Src, size_t Size); + void listenLoop(); + + std::mutex M; + SimpleRemoteEPCTransportClient &C; + std::thread ListenerThread; + int InFD, OutFD; + std::atomic<bool> Disconnected{false}; +}; + +struct RemoteSymbolLookupSetElement { + std::string Name; + bool Required; +}; + +using RemoteSymbolLookupSet = std::vector<RemoteSymbolLookupSetElement>; + +struct RemoteSymbolLookup { + uint64_t H; + RemoteSymbolLookupSet Symbols; +}; + +namespace shared { + +using SPSRemoteSymbolLookupSetElement = SPSTuple<SPSString, bool>; + +using SPSRemoteSymbolLookupSet = SPSSequence<SPSRemoteSymbolLookupSetElement>; + +using SPSRemoteSymbolLookup = SPSTuple<uint64_t, SPSRemoteSymbolLookupSet>; + +/// Tuple containing target triple, page size, and bootstrap symbols. +using SPSSimpleRemoteEPCExecutorInfo = + SPSTuple<SPSString, uint64_t, + SPSSequence<SPSTuple<SPSString, SPSExecutorAddr>>>; + +template <> +class SPSSerializationTraits<SPSRemoteSymbolLookupSetElement, + RemoteSymbolLookupSetElement> { +public: + static size_t size(const RemoteSymbolLookupSetElement &V) { + return SPSArgList<SPSString, bool>::size(V.Name, V.Required); + } + + static size_t serialize(SPSOutputBuffer &OB, + const RemoteSymbolLookupSetElement &V) { + return SPSArgList<SPSString, bool>::serialize(OB, V.Name, V.Required); + } + + static size_t deserialize(SPSInputBuffer &IB, + RemoteSymbolLookupSetElement &V) { + return SPSArgList<SPSString, bool>::deserialize(IB, V.Name, V.Required); + } +}; + +template <> +class SPSSerializationTraits<SPSRemoteSymbolLookup, RemoteSymbolLookup> { +public: + static size_t size(const RemoteSymbolLookup &V) { + return SPSArgList<uint64_t, SPSRemoteSymbolLookupSet>::size(V.H, V.Symbols); + } + + static size_t serialize(SPSOutputBuffer &OB, const RemoteSymbolLookup &V) { + return SPSArgList<uint64_t, SPSRemoteSymbolLookupSet>::serialize(OB, V.H, + V.Symbols); + } + + static size_t deserialize(SPSInputBuffer &IB, RemoteSymbolLookup &V) { + return SPSArgList<uint64_t, SPSRemoteSymbolLookupSet>::deserialize( + IB, V.H, V.Symbols); + } +}; + +template <> +class SPSSerializationTraits<SPSSimpleRemoteEPCExecutorInfo, + SimpleRemoteEPCExecutorInfo> { +public: + static size_t size(const SimpleRemoteEPCExecutorInfo &SI) { + return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::size( + SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols); + } + + static bool serialize(SPSOutputBuffer &OB, + const SimpleRemoteEPCExecutorInfo &SI) { + return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::serialize( + OB, SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols); + } + + static bool deserialize(SPSInputBuffer &IB, SimpleRemoteEPCExecutorInfo &SI) { + return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::deserialize( + IB, SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols); + } +}; + +using SPSLoadDylibSignature = SPSExpected<SPSExecutorAddr>(SPSExecutorAddr, + SPSString, uint64_t); + +using SPSLookupSymbolsSignature = + SPSExpected<SPSSequence<SPSSequence<SPSExecutorAddr>>>( + SPSExecutorAddr, SPSSequence<SPSRemoteSymbolLookup>); + +} // end namespace shared +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEREMOTEEPCUTILS_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h index a44bcd4c8064..0e8b7e7d345a 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h @@ -17,6 +17,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" +#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h" +#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" +#include "llvm/Support/Memory.h" #include <vector> @@ -24,12 +28,108 @@ namespace llvm { namespace orc { namespace tpctypes { +enum WireProtectionFlags : uint8_t { + WPF_None = 0, + WPF_Read = 1U << 0, + WPF_Write = 1U << 1, + WPF_Exec = 1U << 2, + LLVM_MARK_AS_BITMASK_ENUM(WPF_Exec) +}; + +/// Convert from sys::Memory::ProtectionFlags +inline WireProtectionFlags +toWireProtectionFlags(sys::Memory::ProtectionFlags PF) { + WireProtectionFlags WPF = WPF_None; + if (PF & sys::Memory::MF_READ) + WPF |= WPF_Read; + if (PF & sys::Memory::MF_WRITE) + WPF |= WPF_Write; + if (PF & sys::Memory::MF_EXEC) + WPF |= WPF_Exec; + return WPF; +} + +inline sys::Memory::ProtectionFlags +fromWireProtectionFlags(WireProtectionFlags WPF) { + int PF = 0; + if (WPF & WPF_Read) + PF |= sys::Memory::MF_READ; + if (WPF & WPF_Write) + PF |= sys::Memory::MF_WRITE; + if (WPF & WPF_Exec) + PF |= sys::Memory::MF_EXEC; + return static_cast<sys::Memory::ProtectionFlags>(PF); +} + +inline std::string getWireProtectionFlagsStr(WireProtectionFlags WPF) { + std::string Result; + Result += (WPF & WPF_Read) ? 'R' : '-'; + Result += (WPF & WPF_Write) ? 'W' : '-'; + Result += (WPF & WPF_Exec) ? 'X' : '-'; + return Result; +} + +struct WrapperFunctionCall { + ExecutorAddr Func; + ExecutorAddrRange ArgData; + + WrapperFunctionCall() = default; + WrapperFunctionCall(ExecutorAddr Func, ExecutorAddr ArgData, + ExecutorAddrDiff ArgSize) + : Func(Func), ArgData(ArgData, ArgSize) {} + WrapperFunctionCall(ExecutorAddr Func, ExecutorAddrRange ArgData) + : Func(Func), ArgData(ArgData) {} + + shared::WrapperFunctionResult run() { + using FnTy = + shared::CWrapperFunctionResult(const char *ArgData, size_t ArgSize); + return shared::WrapperFunctionResult( + Func.toPtr<FnTy *>()(ArgData.Start.toPtr<const char *>(), + static_cast<size_t>(ArgData.size().getValue()))); + } + + /// Run call and deserialize result using SPS. + template <typename SPSRetT, typename RetT> Error runWithSPSRet(RetT &RetVal) { + auto WFR = run(); + if (const char *ErrMsg = WFR.getOutOfBandError()) + return make_error<StringError>(ErrMsg, inconvertibleErrorCode()); + shared::SPSInputBuffer IB(WFR.data(), WFR.size()); + if (!shared::SPSSerializationTraits<SPSRetT, RetT>::deserialize(IB, RetVal)) + return make_error<StringError>("Could not deserialize result from " + "serialized wrapper function call", + inconvertibleErrorCode()); + return Error::success(); + } + + /// Overload for SPS functions returning void. + Error runWithSPSRet() { + shared::SPSEmpty E; + return runWithSPSRet<shared::SPSEmpty>(E); + } +}; + +struct AllocationActionsPair { + WrapperFunctionCall Finalize; + WrapperFunctionCall Deallocate; +}; + +struct SegFinalizeRequest { + WireProtectionFlags Prot; + ExecutorAddr Addr; + uint64_t Size; + ArrayRef<char> Content; +}; + +struct FinalizeRequest { + std::vector<SegFinalizeRequest> Segments; + std::vector<AllocationActionsPair> Actions; +}; + template <typename T> struct UIntWrite { UIntWrite() = default; - UIntWrite(JITTargetAddress Address, T Value) - : Address(Address), Value(Value) {} + UIntWrite(ExecutorAddr Addr, T Value) : Addr(Addr), Value(Value) {} - JITTargetAddress Address = 0; + ExecutorAddr Addr; T Value = 0; }; @@ -49,10 +149,10 @@ using UInt64Write = UIntWrite<uint64_t>; /// For use with TargetProcessControl::MemoryAccess objects. struct BufferWrite { BufferWrite() = default; - BufferWrite(JITTargetAddress Address, StringRef Buffer) - : Address(Address), Buffer(Buffer) {} + BufferWrite(ExecutorAddr Addr, StringRef Buffer) + : Addr(Addr), Buffer(Buffer) {} - JITTargetAddress Address = 0; + ExecutorAddr Addr; StringRef Buffer; }; @@ -62,6 +162,180 @@ using DylibHandle = JITTargetAddress; using LookupResult = std::vector<JITTargetAddress>; } // end namespace tpctypes + +namespace shared { + +class SPSMemoryProtectionFlags {}; + +using SPSWrapperFunctionCall = SPSTuple<SPSExecutorAddr, SPSExecutorAddrRange>; + +using SPSSegFinalizeRequest = + SPSTuple<SPSMemoryProtectionFlags, SPSExecutorAddr, uint64_t, + SPSSequence<char>>; + +using SPSAllocationActionsPair = + SPSTuple<SPSWrapperFunctionCall, SPSWrapperFunctionCall>; + +using SPSFinalizeRequest = SPSTuple<SPSSequence<SPSSegFinalizeRequest>, + SPSSequence<SPSAllocationActionsPair>>; + +template <typename T> +using SPSMemoryAccessUIntWrite = SPSTuple<SPSExecutorAddr, T>; + +using SPSMemoryAccessUInt8Write = SPSMemoryAccessUIntWrite<uint8_t>; +using SPSMemoryAccessUInt16Write = SPSMemoryAccessUIntWrite<uint16_t>; +using SPSMemoryAccessUInt32Write = SPSMemoryAccessUIntWrite<uint32_t>; +using SPSMemoryAccessUInt64Write = SPSMemoryAccessUIntWrite<uint64_t>; + +using SPSMemoryAccessBufferWrite = SPSTuple<SPSExecutorAddr, SPSSequence<char>>; + +template <> +class SPSSerializationTraits<SPSMemoryProtectionFlags, + tpctypes::WireProtectionFlags> { +public: + static size_t size(const tpctypes::WireProtectionFlags &WPF) { + return SPSArgList<uint8_t>::size(static_cast<uint8_t>(WPF)); + } + + static bool serialize(SPSOutputBuffer &OB, + const tpctypes::WireProtectionFlags &WPF) { + return SPSArgList<uint8_t>::serialize(OB, static_cast<uint8_t>(WPF)); + } + + static bool deserialize(SPSInputBuffer &IB, + tpctypes::WireProtectionFlags &WPF) { + uint8_t Val; + if (!SPSArgList<uint8_t>::deserialize(IB, Val)) + return false; + WPF = static_cast<tpctypes::WireProtectionFlags>(Val); + return true; + } +}; + +template <> +class SPSSerializationTraits<SPSWrapperFunctionCall, + tpctypes::WrapperFunctionCall> { + using AL = SPSWrapperFunctionCall::AsArgList; + +public: + static size_t size(const tpctypes::WrapperFunctionCall &WFC) { + return AL::size(WFC.Func, WFC.ArgData); + } + + static bool serialize(SPSOutputBuffer &OB, + const tpctypes::WrapperFunctionCall &WFC) { + return AL::serialize(OB, WFC.Func, WFC.ArgData); + } + + static bool deserialize(SPSInputBuffer &IB, + tpctypes::WrapperFunctionCall &WFC) { + return AL::deserialize(IB, WFC.Func, WFC.ArgData); + } +}; + +template <> +class SPSSerializationTraits<SPSAllocationActionsPair, + tpctypes::AllocationActionsPair> { + using AL = SPSAllocationActionsPair::AsArgList; + +public: + static size_t size(const tpctypes::AllocationActionsPair &AAP) { + return AL::size(AAP.Finalize, AAP.Deallocate); + } + + static bool serialize(SPSOutputBuffer &OB, + const tpctypes::AllocationActionsPair &AAP) { + return AL::serialize(OB, AAP.Finalize, AAP.Deallocate); + } + + static bool deserialize(SPSInputBuffer &IB, + tpctypes::AllocationActionsPair &AAP) { + return AL::deserialize(IB, AAP.Finalize, AAP.Deallocate); + } +}; + +template <> +class SPSSerializationTraits<SPSSegFinalizeRequest, + tpctypes::SegFinalizeRequest> { + using SFRAL = SPSSegFinalizeRequest::AsArgList; + +public: + static size_t size(const tpctypes::SegFinalizeRequest &SFR) { + return SFRAL::size(SFR.Prot, SFR.Addr, SFR.Size, SFR.Content); + } + + static bool serialize(SPSOutputBuffer &OB, + const tpctypes::SegFinalizeRequest &SFR) { + return SFRAL::serialize(OB, SFR.Prot, SFR.Addr, SFR.Size, SFR.Content); + } + + static bool deserialize(SPSInputBuffer &IB, + tpctypes::SegFinalizeRequest &SFR) { + return SFRAL::deserialize(IB, SFR.Prot, SFR.Addr, SFR.Size, SFR.Content); + } +}; + +template <> +class SPSSerializationTraits<SPSFinalizeRequest, tpctypes::FinalizeRequest> { + using FRAL = SPSFinalizeRequest::AsArgList; + +public: + static size_t size(const tpctypes::FinalizeRequest &FR) { + return FRAL::size(FR.Segments, FR.Actions); + } + + static bool serialize(SPSOutputBuffer &OB, + const tpctypes::FinalizeRequest &FR) { + return FRAL::serialize(OB, FR.Segments, FR.Actions); + } + + static bool deserialize(SPSInputBuffer &IB, tpctypes::FinalizeRequest &FR) { + return FRAL::deserialize(IB, FR.Segments, FR.Actions); + } +}; + +template <typename T> +class SPSSerializationTraits<SPSMemoryAccessUIntWrite<T>, + tpctypes::UIntWrite<T>> { +public: + static size_t size(const tpctypes::UIntWrite<T> &W) { + return SPSTuple<SPSExecutorAddr, T>::AsArgList::size(W.Addr, W.Value); + } + + static bool serialize(SPSOutputBuffer &OB, const tpctypes::UIntWrite<T> &W) { + return SPSTuple<SPSExecutorAddr, T>::AsArgList::serialize(OB, W.Addr, + W.Value); + } + + static bool deserialize(SPSInputBuffer &IB, tpctypes::UIntWrite<T> &W) { + return SPSTuple<SPSExecutorAddr, T>::AsArgList::deserialize(IB, W.Addr, + W.Value); + } +}; + +template <> +class SPSSerializationTraits<SPSMemoryAccessBufferWrite, + tpctypes::BufferWrite> { +public: + static size_t size(const tpctypes::BufferWrite &W) { + return SPSTuple<SPSExecutorAddr, SPSSequence<char>>::AsArgList::size( + W.Addr, W.Buffer); + } + + static bool serialize(SPSOutputBuffer &OB, const tpctypes::BufferWrite &W) { + return SPSTuple<SPSExecutorAddr, SPSSequence<char>>::AsArgList ::serialize( + OB, W.Addr, W.Buffer); + } + + static bool deserialize(SPSInputBuffer &IB, tpctypes::BufferWrite &W) { + return SPSTuple<SPSExecutorAddr, + SPSSequence<char>>::AsArgList ::deserialize(IB, W.Addr, + W.Buffer); + } +}; + + +} // end namespace shared } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h index 2f14a1c76332..bf841b1f706b 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h @@ -10,9 +10,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_WRAPPERFUNCTIONUTILS_H -#define LLVM_EXECUTIONENGINE_ORC_WRAPPERFUNCTIONUTILS_H +#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_WRAPPERFUNCTIONUTILS_H +#define LLVM_EXECUTIONENGINE_ORC_SHARED_WRAPPERFUNCTIONUTILS_H +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" #include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h" #include "llvm/Support/Error.h" @@ -22,24 +23,18 @@ namespace llvm { namespace orc { namespace shared { -namespace detail { - -// DO NOT USE DIRECTLY. // Must be kept in-sync with compiler-rt/lib/orc/c-api.h. union CWrapperFunctionResultDataUnion { char *ValuePtr; char Value[sizeof(ValuePtr)]; }; -// DO NOT USE DIRECTLY. // Must be kept in-sync with compiler-rt/lib/orc/c-api.h. typedef struct { CWrapperFunctionResultDataUnion Data; size_t Size; } CWrapperFunctionResult; -} // end namespace detail - /// C++ wrapper function result: Same as CWrapperFunctionResult but /// auto-releases memory. class WrapperFunctionResult { @@ -48,11 +43,11 @@ public: WrapperFunctionResult() { init(R); } /// Create a WrapperFunctionResult by taking ownership of a - /// detail::CWrapperFunctionResult. + /// CWrapperFunctionResult. /// /// Warning: This should only be used by clients writing wrapper-function /// caller utilities (like TargetProcessControl). - WrapperFunctionResult(detail::CWrapperFunctionResult R) : R(R) { + WrapperFunctionResult(CWrapperFunctionResult R) : R(R) { // Reset R. init(R); } @@ -77,18 +72,25 @@ public: free(R.Data.ValuePtr); } - /// Release ownership of the contained detail::CWrapperFunctionResult. + /// Release ownership of the contained CWrapperFunctionResult. /// Warning: Do not use -- this method will be removed in the future. It only /// exists to temporarily support some code that will eventually be moved to /// the ORC runtime. - detail::CWrapperFunctionResult release() { - detail::CWrapperFunctionResult Tmp; + CWrapperFunctionResult release() { + CWrapperFunctionResult Tmp; init(Tmp); std::swap(R, Tmp); return Tmp; } /// Get a pointer to the data contained in this instance. + char *data() { + assert((R.Size != 0 || R.Data.ValuePtr == nullptr) && + "Cannot get data for out-of-band error value"); + return R.Size > sizeof(R.Data.Value) ? R.Data.ValuePtr : R.Data.Value; + } + + /// Get a const pointer to the data contained in this instance. const char *data() const { assert((R.Size != 0 || R.Data.ValuePtr == nullptr) && "Cannot get data for out-of-band error value"); @@ -108,24 +110,19 @@ public: /// Create a WrapperFunctionResult with the given size and return a pointer /// to the underlying memory. - static char *allocate(WrapperFunctionResult &WFR, size_t Size) { + static WrapperFunctionResult allocate(size_t Size) { // Reset. - WFR = WrapperFunctionResult(); + WrapperFunctionResult WFR; WFR.R.Size = Size; - char *DataPtr; - if (WFR.R.Size > sizeof(WFR.R.Data.Value)) { - DataPtr = (char *)malloc(WFR.R.Size); - WFR.R.Data.ValuePtr = DataPtr; - } else - DataPtr = WFR.R.Data.Value; - return DataPtr; + if (WFR.R.Size > sizeof(WFR.R.Data.Value)) + WFR.R.Data.ValuePtr = (char *)malloc(WFR.R.Size); + return WFR; } /// Copy from the given char range. static WrapperFunctionResult copyFrom(const char *Source, size_t Size) { - WrapperFunctionResult WFR; - char *DataPtr = allocate(WFR, Size); - memcpy(DataPtr, Source, Size); + auto WFR = allocate(Size); + memcpy(WFR.data(), Source, Size); return WFR; } @@ -161,12 +158,12 @@ public: } private: - static void init(detail::CWrapperFunctionResult &R) { + static void init(CWrapperFunctionResult &R) { R.Data.ValuePtr = nullptr; R.Size = 0; } - detail::CWrapperFunctionResult R; + CWrapperFunctionResult R; }; namespace detail { @@ -174,10 +171,8 @@ namespace detail { template <typename SPSArgListT, typename... ArgTs> WrapperFunctionResult serializeViaSPSToWrapperFunctionResult(const ArgTs &...Args) { - WrapperFunctionResult Result; - char *DataPtr = - WrapperFunctionResult::allocate(Result, SPSArgListT::size(Args...)); - SPSOutputBuffer OB(DataPtr, Result.size()); + auto Result = WrapperFunctionResult::allocate(SPSArgListT::size(Args...)); + SPSOutputBuffer OB(Result.data(), Result.size()); if (!SPSArgListT::serialize(OB, Args...)) return WrapperFunctionResult::createOutOfBandError( "Error serializing arguments to blob in call"); @@ -315,6 +310,7 @@ private: static void callAsync(HandlerT &&H, SerializeAndSendResultT &&SerializeAndSendResult, ArgTupleT Args, std::index_sequence<I...>) { + (void)Args; // Silence a buggy GCC warning. return std::forward<HandlerT>(H)(std::move(SerializeAndSendResult), std::move(std::get<I>(Args))...); } @@ -486,10 +482,16 @@ public: } auto SendSerializedResult = [SDR = std::move(SendDeserializedResult)]( - WrapperFunctionResult R) { + WrapperFunctionResult R) mutable { RetT RetVal = detail::ResultDeserializer<SPSRetTagT, RetT>::makeValue(); detail::ResultDeserializer<SPSRetTagT, RetT>::makeSafe(RetVal); + if (auto *ErrMsg = R.getOutOfBandError()) { + SDR(make_error<StringError>(ErrMsg, inconvertibleErrorCode()), + std::move(RetVal)); + return; + } + SPSInputBuffer IB(R.data(), R.size()); if (auto Err = detail::ResultDeserializer<SPSRetTagT, RetT>::deserialize( RetVal, R.data(), R.size())) @@ -547,12 +549,68 @@ public: return WrapperFunction<SPSEmpty(SPSTagTs...)>::call(Caller, BE, Args...); } + template <typename AsyncCallerFn, typename SendDeserializedResultFn, + typename... ArgTs> + static void callAsync(AsyncCallerFn &&Caller, + SendDeserializedResultFn &&SendDeserializedResult, + const ArgTs &...Args) { + WrapperFunction<SPSEmpty(SPSTagTs...)>::callAsync( + std::forward<AsyncCallerFn>(Caller), + [SDR = std::move(SendDeserializedResult)](Error SerializeErr, + SPSEmpty E) mutable { + SDR(std::move(SerializeErr)); + }, + Args...); + } + using WrapperFunction<SPSEmpty(SPSTagTs...)>::handle; using WrapperFunction<SPSEmpty(SPSTagTs...)>::handleAsync; }; +/// A function object that takes an ExecutorAddr as its first argument, +/// casts that address to a ClassT*, then calls the given method on that +/// pointer passing in the remaining function arguments. This utility +/// removes some of the boilerplate from writing wrappers for method calls. +/// +/// @code{.cpp} +/// class MyClass { +/// public: +/// void myMethod(uint32_t, bool) { ... } +/// }; +/// +/// // SPS Method signature -- note MyClass object address as first argument. +/// using SPSMyMethodWrapperSignature = +/// SPSTuple<SPSExecutorAddr, uint32_t, bool>; +/// +/// WrapperFunctionResult +/// myMethodCallWrapper(const char *ArgData, size_t ArgSize) { +/// return WrapperFunction<SPSMyMethodWrapperSignature>::handle( +/// ArgData, ArgSize, makeMethodWrapperHandler(&MyClass::myMethod)); +/// } +/// @endcode +/// +template <typename RetT, typename ClassT, typename... ArgTs> +class MethodWrapperHandler { +public: + using MethodT = RetT (ClassT::*)(ArgTs...); + MethodWrapperHandler(MethodT M) : M(M) {} + RetT operator()(ExecutorAddr ObjAddr, ArgTs &...Args) { + return (ObjAddr.toPtr<ClassT*>()->*M)(std::forward<ArgTs>(Args)...); + } + +private: + MethodT M; +}; + +/// Create a MethodWrapperHandler object from the given method pointer. +template <typename RetT, typename ClassT, typename... ArgTs> +MethodWrapperHandler<RetT, ClassT, ArgTs...> +makeMethodWrapperHandler(RetT (ClassT::*Method)(ArgTs...)) { + return MethodWrapperHandler<RetT, ClassT, ArgTs...>(Method); +} + } // end namespace shared } // end namespace orc } // end namespace llvm -#endif // LLVM_EXECUTIONENGINE_ORC_WRAPPERFUNCTIONUTILS_H +#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_WRAPPERFUNCTIONUTILS_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h b/llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h new file mode 100644 index 000000000000..bd72e4535325 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h @@ -0,0 +1,140 @@ +//===---- SimpleRemoteEPC.h - Simple remote executor control ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Simple remote executor process control. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_SIMPLEREMOTEEPC_H +#define LLVM_EXECUTIONENGINE_ORC_SIMPLEREMOTEEPC_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FunctionExtras.h" +#include "llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h" +#include "llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h" +#include "llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" +#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MSVCErrorWorkarounds.h" + +#include <future> + +namespace llvm { +namespace orc { + +class SimpleRemoteEPC : public ExecutorProcessControl, + public SimpleRemoteEPCTransportClient { +public: + /// A setup object containing callbacks to construct a memory manager and + /// memory access object. Both are optional. If not specified, + /// EPCGenericJITLinkMemoryManager and EPCGenericMemoryAccess will be used. + struct Setup { + using CreateMemoryManagerFn = + Expected<std::unique_ptr<jitlink::JITLinkMemoryManager>>( + SimpleRemoteEPC &); + using CreateMemoryAccessFn = + Expected<std::unique_ptr<MemoryAccess>>(SimpleRemoteEPC &); + + unique_function<CreateMemoryManagerFn> CreateMemoryManager; + unique_function<CreateMemoryAccessFn> CreateMemoryAccess; + }; + + /// Create a SimpleRemoteEPC using the given transport type and args. + template <typename TransportT, typename... TransportTCtorArgTs> + static Expected<std::unique_ptr<SimpleRemoteEPC>> + Create(std::unique_ptr<TaskDispatcher> D, Setup S, + TransportTCtorArgTs &&...TransportTCtorArgs) { + std::unique_ptr<SimpleRemoteEPC> SREPC( + new SimpleRemoteEPC(std::make_shared<SymbolStringPool>(), + std::move(D))); + auto T = TransportT::Create( + *SREPC, std::forward<TransportTCtorArgTs>(TransportTCtorArgs)...); + if (!T) + return T.takeError(); + SREPC->T = std::move(*T); + if (auto Err = SREPC->setup(std::move(S))) + return joinErrors(std::move(Err), SREPC->disconnect()); + return std::move(SREPC); + } + + SimpleRemoteEPC(const SimpleRemoteEPC &) = delete; + SimpleRemoteEPC &operator=(const SimpleRemoteEPC &) = delete; + SimpleRemoteEPC(SimpleRemoteEPC &&) = delete; + SimpleRemoteEPC &operator=(SimpleRemoteEPC &&) = delete; + ~SimpleRemoteEPC(); + + Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override; + + Expected<std::vector<tpctypes::LookupResult>> + lookupSymbols(ArrayRef<LookupRequest> Request) override; + + Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr, + ArrayRef<std::string> Args) override; + + void callWrapperAsync(ExecutorAddr WrapperFnAddr, + IncomingWFRHandler OnComplete, + ArrayRef<char> ArgBuffer) override; + + Error disconnect() override; + + Expected<HandleMessageAction> + handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, ExecutorAddr TagAddr, + SimpleRemoteEPCArgBytesVector ArgBytes) override; + + void handleDisconnect(Error Err) override; + +private: + SimpleRemoteEPC(std::shared_ptr<SymbolStringPool> SSP, + std::unique_ptr<TaskDispatcher> D) + : ExecutorProcessControl(std::move(SSP), std::move(D)) {} + + static Expected<std::unique_ptr<jitlink::JITLinkMemoryManager>> + createDefaultMemoryManager(SimpleRemoteEPC &SREPC); + static Expected<std::unique_ptr<MemoryAccess>> + createDefaultMemoryAccess(SimpleRemoteEPC &SREPC); + + Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, + ExecutorAddr TagAddr, ArrayRef<char> ArgBytes); + + Error handleSetup(uint64_t SeqNo, ExecutorAddr TagAddr, + SimpleRemoteEPCArgBytesVector ArgBytes); + Error setup(Setup S); + + Error handleResult(uint64_t SeqNo, ExecutorAddr TagAddr, + SimpleRemoteEPCArgBytesVector ArgBytes); + void handleCallWrapper(uint64_t RemoteSeqNo, ExecutorAddr TagAddr, + SimpleRemoteEPCArgBytesVector ArgBytes); + Error handleHangup(SimpleRemoteEPCArgBytesVector ArgBytes); + + uint64_t getNextSeqNo() { return NextSeqNo++; } + void releaseSeqNo(uint64_t SeqNo) {} + + using PendingCallWrapperResultsMap = + DenseMap<uint64_t, IncomingWFRHandler>; + + std::mutex SimpleRemoteEPCMutex; + std::condition_variable DisconnectCV; + bool Disconnected = false; + Error DisconnectErr = Error::success(); + + std::unique_ptr<SimpleRemoteEPCTransport> T; + std::unique_ptr<jitlink::JITLinkMemoryManager> OwnedMemMgr; + std::unique_ptr<MemoryAccess> OwnedMemAccess; + + std::unique_ptr<EPCGenericDylibManager> DylibMgr; + ExecutorAddr RunAsMainAddr; + + uint64_t NextSeqNo = 0; + PendingCallWrapperResultsMap PendingCallWrapperResults; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_SIMPLEREMOTEEPC_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h new file mode 100644 index 000000000000..32c127634b25 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h @@ -0,0 +1,36 @@ +//===- ExecutorService.h - Provide bootstrap symbols to session -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Provides a service by supplying some set of bootstrap symbols. +// +// FIXME: The functionality in this file should be moved to the ORC runtime. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORBOOTSTRAPSERVICE_H +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORBOOTSTRAPSERVICE_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" + +namespace llvm { +namespace orc { + +class ExecutorBootstrapService { +public: + virtual ~ExecutorBootstrapService(); + + virtual void + addBootstrapSymbols(StringMap<ExecutorAddr> &BootstrapSymbols) = 0; + virtual Error shutdown() = 0; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORBOOTSTRAPSERVICE_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h index 3fad98b5f178..cfb951178da6 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h @@ -16,7 +16,7 @@ #include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" #include <cstdint> -extern "C" llvm::orc::shared::detail::CWrapperFunctionResult +extern "C" llvm::orc::shared::CWrapperFunctionResult llvm_orc_registerJITLoaderGDBWrapper(const char *Data, uint64_t Size); #endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERGDB_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h deleted file mode 100644 index 96e4341fce68..000000000000 --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h +++ /dev/null @@ -1,660 +0,0 @@ -//===-- OrcRPCTPCServer.h -- OrcRPCTargetProcessControl Server --*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// OrcRPCTargetProcessControl server class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H -#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H - -#include "llvm/ADT/BitmaskEnum.h" -#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h" -#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h" -#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h" -#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" -#include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/Memory.h" -#include "llvm/Support/Process.h" - -#include <atomic> - -namespace llvm { -namespace orc { - -namespace orcrpctpc { - -enum WireProtectionFlags : uint8_t { - WPF_None = 0, - WPF_Read = 1U << 0, - WPF_Write = 1U << 1, - WPF_Exec = 1U << 2, - LLVM_MARK_AS_BITMASK_ENUM(WPF_Exec) -}; - -struct ExecutorProcessInfo { - std::string Triple; - unsigned PageSize; - JITTargetAddress DispatchFuncAddr; - JITTargetAddress DispatchCtxAddr; -}; - -/// Convert from sys::Memory::ProtectionFlags -inline WireProtectionFlags -toWireProtectionFlags(sys::Memory::ProtectionFlags PF) { - WireProtectionFlags WPF = WPF_None; - if (PF & sys::Memory::MF_READ) - WPF |= WPF_Read; - if (PF & sys::Memory::MF_WRITE) - WPF |= WPF_Write; - if (PF & sys::Memory::MF_EXEC) - WPF |= WPF_Exec; - return WPF; -} - -inline sys::Memory::ProtectionFlags -fromWireProtectionFlags(WireProtectionFlags WPF) { - int PF = 0; - if (WPF & WPF_Read) - PF |= sys::Memory::MF_READ; - if (WPF & WPF_Write) - PF |= sys::Memory::MF_WRITE; - if (WPF & WPF_Exec) - PF |= sys::Memory::MF_EXEC; - return static_cast<sys::Memory::ProtectionFlags>(PF); -} - -struct ReserveMemRequestElement { - WireProtectionFlags Prot = WPF_None; - uint64_t Size = 0; - uint64_t Alignment = 0; -}; - -using ReserveMemRequest = std::vector<ReserveMemRequestElement>; - -struct ReserveMemResultElement { - WireProtectionFlags Prot = WPF_None; - JITTargetAddress Address = 0; - uint64_t AllocatedSize = 0; -}; - -using ReserveMemResult = std::vector<ReserveMemResultElement>; - -struct ReleaseOrFinalizeMemRequestElement { - WireProtectionFlags Prot = WPF_None; - JITTargetAddress Address = 0; - uint64_t Size = 0; -}; - -using ReleaseOrFinalizeMemRequest = - std::vector<ReleaseOrFinalizeMemRequestElement>; - -} // end namespace orcrpctpc - -namespace shared { - -template <> class SerializationTypeName<WrapperFunctionResult> { -public: - static const char *getName() { return "WrapperFunctionResult"; } -}; - -template <typename ChannelT> -class SerializationTraits< - ChannelT, WrapperFunctionResult, WrapperFunctionResult, - std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> { -public: - static Error serialize(ChannelT &C, const WrapperFunctionResult &E) { - if (auto Err = serializeSeq(C, static_cast<uint64_t>(E.size()))) - return Err; - if (E.size() == 0) - return Error::success(); - return C.appendBytes(E.data(), E.size()); - } - - static Error deserialize(ChannelT &C, WrapperFunctionResult &E) { - uint64_t Size; - if (auto Err = deserializeSeq(C, Size)) - return Err; - - WrapperFunctionResult Tmp; - char *Data = WrapperFunctionResult::allocate(Tmp, Size); - - if (auto Err = C.readBytes(Data, Size)) - return Err; - - E = std::move(Tmp); - - return Error::success(); - } -}; - -template <> class SerializationTypeName<tpctypes::UInt8Write> { -public: - static const char *getName() { return "UInt8Write"; } -}; - -template <> class SerializationTypeName<tpctypes::UInt16Write> { -public: - static const char *getName() { return "UInt16Write"; } -}; - -template <> class SerializationTypeName<tpctypes::UInt32Write> { -public: - static const char *getName() { return "UInt32Write"; } -}; - -template <> class SerializationTypeName<tpctypes::UInt64Write> { -public: - static const char *getName() { return "UInt64Write"; } -}; - -template <> class SerializationTypeName<tpctypes::BufferWrite> { -public: - static const char *getName() { return "BufferWrite"; } -}; - -template <> class SerializationTypeName<orcrpctpc::ReserveMemRequestElement> { -public: - static const char *getName() { return "ReserveMemRequestElement"; } -}; - -template <> class SerializationTypeName<orcrpctpc::ReserveMemResultElement> { -public: - static const char *getName() { return "ReserveMemResultElement"; } -}; - -template <> -class SerializationTypeName<orcrpctpc::ReleaseOrFinalizeMemRequestElement> { -public: - static const char *getName() { return "ReleaseOrFinalizeMemRequestElement"; } -}; - -template <> class SerializationTypeName<orcrpctpc::ExecutorProcessInfo> { -public: - static const char *getName() { return "ExecutorProcessInfo"; } -}; - -template <typename ChannelT, typename WriteT> -class SerializationTraits< - ChannelT, WriteT, WriteT, - std::enable_if_t<std::is_same<WriteT, tpctypes::UInt8Write>::value || - std::is_same<WriteT, tpctypes::UInt16Write>::value || - std::is_same<WriteT, tpctypes::UInt32Write>::value || - std::is_same<WriteT, tpctypes::UInt64Write>::value>> { -public: - static Error serialize(ChannelT &C, const WriteT &W) { - return serializeSeq(C, W.Address, W.Value); - } - static Error deserialize(ChannelT &C, WriteT &W) { - return deserializeSeq(C, W.Address, W.Value); - } -}; - -template <typename ChannelT> -class SerializationTraits< - ChannelT, tpctypes::BufferWrite, tpctypes::BufferWrite, - std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> { -public: - static Error serialize(ChannelT &C, const tpctypes::BufferWrite &W) { - uint64_t Size = W.Buffer.size(); - if (auto Err = serializeSeq(C, W.Address, Size)) - return Err; - - return C.appendBytes(W.Buffer.data(), Size); - } - static Error deserialize(ChannelT &C, tpctypes::BufferWrite &W) { - JITTargetAddress Address; - uint64_t Size; - - if (auto Err = deserializeSeq(C, Address, Size)) - return Err; - - char *Buffer = jitTargetAddressToPointer<char *>(Address); - - if (auto Err = C.readBytes(Buffer, Size)) - return Err; - - W = {Address, StringRef(Buffer, Size)}; - return Error::success(); - } -}; - -template <typename ChannelT> -class SerializationTraits<ChannelT, orcrpctpc::ReserveMemRequestElement> { -public: - static Error serialize(ChannelT &C, - const orcrpctpc::ReserveMemRequestElement &E) { - return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Size, E.Alignment); - } - - static Error deserialize(ChannelT &C, - orcrpctpc::ReserveMemRequestElement &E) { - return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Size, - E.Alignment); - } -}; - -template <typename ChannelT> -class SerializationTraits<ChannelT, orcrpctpc::ReserveMemResultElement> { -public: - static Error serialize(ChannelT &C, - const orcrpctpc::ReserveMemResultElement &E) { - return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Address, - E.AllocatedSize); - } - - static Error deserialize(ChannelT &C, orcrpctpc::ReserveMemResultElement &E) { - return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Address, - E.AllocatedSize); - } -}; - -template <typename ChannelT> -class SerializationTraits<ChannelT, - orcrpctpc::ReleaseOrFinalizeMemRequestElement> { -public: - static Error - serialize(ChannelT &C, - const orcrpctpc::ReleaseOrFinalizeMemRequestElement &E) { - return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Address, E.Size); - } - - static Error deserialize(ChannelT &C, - orcrpctpc::ReleaseOrFinalizeMemRequestElement &E) { - return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Address, - E.Size); - } -}; - -template <typename ChannelT> -class SerializationTraits<ChannelT, orcrpctpc::ExecutorProcessInfo> { -public: - static Error serialize(ChannelT &C, - const orcrpctpc::ExecutorProcessInfo &EPI) { - return serializeSeq(C, EPI.Triple, EPI.PageSize, EPI.DispatchFuncAddr, - EPI.DispatchCtxAddr); - } - - static Error deserialize(ChannelT &C, orcrpctpc::ExecutorProcessInfo &EPI) { - return deserializeSeq(C, EPI.Triple, EPI.PageSize, EPI.DispatchFuncAddr, - EPI.DispatchCtxAddr); - } -}; - -} // end namespace shared - -namespace orcrpctpc { - -using RemoteSymbolLookupSet = std::vector<std::pair<std::string, bool>>; -using RemoteLookupRequest = - std::pair<tpctypes::DylibHandle, RemoteSymbolLookupSet>; - -class GetExecutorProcessInfo - : public shared::RPCFunction<GetExecutorProcessInfo, - orcrpctpc::ExecutorProcessInfo()> { -public: - static const char *getName() { return "GetJITDispatchInfo"; } -}; - -class ReserveMem - : public shared::RPCFunction<ReserveMem, Expected<ReserveMemResult>( - ReserveMemRequest)> { -public: - static const char *getName() { return "ReserveMem"; } -}; - -class FinalizeMem - : public shared::RPCFunction<FinalizeMem, - Error(ReleaseOrFinalizeMemRequest)> { -public: - static const char *getName() { return "FinalizeMem"; } -}; - -class ReleaseMem - : public shared::RPCFunction<ReleaseMem, - Error(ReleaseOrFinalizeMemRequest)> { -public: - static const char *getName() { return "ReleaseMem"; } -}; - -class WriteUInt8s - : public shared::RPCFunction<WriteUInt8s, - Error(std::vector<tpctypes::UInt8Write>)> { -public: - static const char *getName() { return "WriteUInt8s"; } -}; - -class WriteUInt16s - : public shared::RPCFunction<WriteUInt16s, - Error(std::vector<tpctypes::UInt16Write>)> { -public: - static const char *getName() { return "WriteUInt16s"; } -}; - -class WriteUInt32s - : public shared::RPCFunction<WriteUInt32s, - Error(std::vector<tpctypes::UInt32Write>)> { -public: - static const char *getName() { return "WriteUInt32s"; } -}; - -class WriteUInt64s - : public shared::RPCFunction<WriteUInt64s, - Error(std::vector<tpctypes::UInt64Write>)> { -public: - static const char *getName() { return "WriteUInt64s"; } -}; - -class WriteBuffers - : public shared::RPCFunction<WriteBuffers, - Error(std::vector<tpctypes::BufferWrite>)> { -public: - static const char *getName() { return "WriteBuffers"; } -}; - -class LoadDylib - : public shared::RPCFunction<LoadDylib, Expected<tpctypes::DylibHandle>( - std::string DylibPath)> { -public: - static const char *getName() { return "LoadDylib"; } -}; - -class LookupSymbols - : public shared::RPCFunction<LookupSymbols, - Expected<std::vector<tpctypes::LookupResult>>( - std::vector<RemoteLookupRequest>)> { -public: - static const char *getName() { return "LookupSymbols"; } -}; - -class RunMain - : public shared::RPCFunction<RunMain, - int64_t(JITTargetAddress MainAddr, - std::vector<std::string> Args)> { -public: - static const char *getName() { return "RunMain"; } -}; - -class RunWrapper - : public shared::RPCFunction<RunWrapper, - shared::WrapperFunctionResult( - JITTargetAddress, std::vector<uint8_t>)> { -public: - static const char *getName() { return "RunWrapper"; } -}; - -class CloseConnection : public shared::RPCFunction<CloseConnection, void()> { -public: - static const char *getName() { return "CloseConnection"; } -}; - -} // end namespace orcrpctpc - -/// TargetProcessControl for a process connected via an ORC RPC Endpoint. -template <typename RPCEndpointT> class OrcRPCTPCServer { -private: - using ThisT = OrcRPCTPCServer<RPCEndpointT>; - -public: - /// Create an OrcRPCTPCServer from the given endpoint. - OrcRPCTPCServer(RPCEndpointT &EP) : EP(EP) { - - TripleStr = sys::getProcessTriple(); - PageSize = sys::Process::getPageSizeEstimate(); - - EP.template addHandler<orcrpctpc::GetExecutorProcessInfo>( - *this, &ThisT::getExecutorProcessInfo); - EP.template addHandler<orcrpctpc::ReserveMem>(*this, &ThisT::reserveMemory); - EP.template addHandler<orcrpctpc::FinalizeMem>(*this, - &ThisT::finalizeMemory); - EP.template addHandler<orcrpctpc::ReleaseMem>(*this, &ThisT::releaseMemory); - - EP.template addHandler<orcrpctpc::WriteUInt8s>( - handleWriteUInt<tpctypes::UInt8Write>); - EP.template addHandler<orcrpctpc::WriteUInt16s>( - handleWriteUInt<tpctypes::UInt16Write>); - EP.template addHandler<orcrpctpc::WriteUInt32s>( - handleWriteUInt<tpctypes::UInt32Write>); - EP.template addHandler<orcrpctpc::WriteUInt64s>( - handleWriteUInt<tpctypes::UInt64Write>); - EP.template addHandler<orcrpctpc::WriteBuffers>(handleWriteBuffer); - - EP.template addHandler<orcrpctpc::LoadDylib>(*this, &ThisT::loadDylib); - EP.template addHandler<orcrpctpc::LookupSymbols>(*this, - &ThisT::lookupSymbols); - - EP.template addHandler<orcrpctpc::RunMain>(*this, &ThisT::runMain); - EP.template addHandler<orcrpctpc::RunWrapper>(*this, &ThisT::runWrapper); - - EP.template addHandler<orcrpctpc::CloseConnection>(*this, - &ThisT::closeConnection); - } - - /// Set the ProgramName to be used as the first argv element when running - /// functions via runAsMain. - void setProgramName(Optional<std::string> ProgramName = None) { - this->ProgramName = std::move(ProgramName); - } - - /// Get the RPC endpoint for this server. - RPCEndpointT &getEndpoint() { return EP; } - - /// Run the server loop. - Error run() { - while (!Finished) { - if (auto Err = EP.handleOne()) - return Err; - } - return Error::success(); - } - - Expected<shared::WrapperFunctionResult> - runWrapperInJIT(JITTargetAddress FunctionId, ArrayRef<char> ArgBuffer) { - return EP.template callB<orcrpctpc::RunWrapper>( - FunctionId, - ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(ArgBuffer.data()), - ArgBuffer.size())); - } - -private: - static shared::detail::CWrapperFunctionResult - jitDispatchViaOrcRPCTPCServer(void *Ctx, const void *FnTag, const char *Data, - size_t Size) { - assert(Ctx && "Attempt to dispatch with null context ptr"); - auto R = static_cast<ThisT *>(Ctx)->runWrapperInJIT( - pointerToJITTargetAddress(FnTag), {Data, Size}); - if (!R) { - auto ErrMsg = toString(R.takeError()); - return shared::WrapperFunctionResult::createOutOfBandError(ErrMsg.data()) - .release(); - } - return R->release(); - } - - orcrpctpc::ExecutorProcessInfo getExecutorProcessInfo() { - return {TripleStr, static_cast<uint32_t>(PageSize), - pointerToJITTargetAddress(jitDispatchViaOrcRPCTPCServer), - pointerToJITTargetAddress(this)}; - } - - template <typename WriteT> - static void handleWriteUInt(const std::vector<WriteT> &Ws) { - using ValueT = decltype(std::declval<WriteT>().Value); - for (auto &W : Ws) - *jitTargetAddressToPointer<ValueT *>(W.Address) = W.Value; - } - - std::string getProtStr(orcrpctpc::WireProtectionFlags WPF) { - std::string Result; - Result += (WPF & orcrpctpc::WPF_Read) ? 'R' : '-'; - Result += (WPF & orcrpctpc::WPF_Write) ? 'W' : '-'; - Result += (WPF & orcrpctpc::WPF_Exec) ? 'X' : '-'; - return Result; - } - - static void handleWriteBuffer(const std::vector<tpctypes::BufferWrite> &Ws) { - for (auto &W : Ws) { - memcpy(jitTargetAddressToPointer<char *>(W.Address), W.Buffer.data(), - W.Buffer.size()); - } - } - - Expected<orcrpctpc::ReserveMemResult> - reserveMemory(const orcrpctpc::ReserveMemRequest &Request) { - orcrpctpc::ReserveMemResult Allocs; - auto PF = sys::Memory::MF_READ | sys::Memory::MF_WRITE; - - uint64_t TotalSize = 0; - - for (const auto &E : Request) { - uint64_t Size = alignTo(E.Size, PageSize); - uint16_t Align = E.Alignment; - - if ((Align > PageSize) || (PageSize % Align)) - return make_error<StringError>( - "Page alignmen does not satisfy requested alignment", - inconvertibleErrorCode()); - - TotalSize += Size; - } - - // Allocate memory slab. - std::error_code EC; - auto MB = sys::Memory::allocateMappedMemory(TotalSize, nullptr, PF, EC); - if (EC) - return make_error<StringError>("Unable to allocate memory: " + - EC.message(), - inconvertibleErrorCode()); - - // Zero-fill the whole thing. - memset(MB.base(), 0, MB.allocatedSize()); - - // Carve up sections to return. - uint64_t SectionBase = 0; - for (const auto &E : Request) { - uint64_t SectionSize = alignTo(E.Size, PageSize); - Allocs.push_back({E.Prot, - pointerToJITTargetAddress(MB.base()) + SectionBase, - SectionSize}); - SectionBase += SectionSize; - } - - return Allocs; - } - - Error finalizeMemory(const orcrpctpc::ReleaseOrFinalizeMemRequest &FMR) { - for (const auto &E : FMR) { - sys::MemoryBlock MB(jitTargetAddressToPointer<void *>(E.Address), E.Size); - - auto PF = orcrpctpc::fromWireProtectionFlags(E.Prot); - if (auto EC = - sys::Memory::protectMappedMemory(MB, static_cast<unsigned>(PF))) - return make_error<StringError>("error protecting memory: " + - EC.message(), - inconvertibleErrorCode()); - } - return Error::success(); - } - - Error releaseMemory(const orcrpctpc::ReleaseOrFinalizeMemRequest &RMR) { - for (const auto &E : RMR) { - sys::MemoryBlock MB(jitTargetAddressToPointer<void *>(E.Address), E.Size); - - if (auto EC = sys::Memory::releaseMappedMemory(MB)) - return make_error<StringError>("error release memory: " + EC.message(), - inconvertibleErrorCode()); - } - return Error::success(); - } - - Expected<tpctypes::DylibHandle> loadDylib(const std::string &Path) { - std::string ErrMsg; - const char *DLPath = !Path.empty() ? Path.c_str() : nullptr; - auto DL = sys::DynamicLibrary::getPermanentLibrary(DLPath, &ErrMsg); - if (!DL.isValid()) - return make_error<StringError>(std::move(ErrMsg), - inconvertibleErrorCode()); - - tpctypes::DylibHandle H = Dylibs.size(); - Dylibs[H] = std::move(DL); - return H; - } - - Expected<std::vector<tpctypes::LookupResult>> - lookupSymbols(const std::vector<orcrpctpc::RemoteLookupRequest> &Request) { - std::vector<tpctypes::LookupResult> Result; - - for (const auto &E : Request) { - auto I = Dylibs.find(E.first); - if (I == Dylibs.end()) - return make_error<StringError>("Unrecognized handle", - inconvertibleErrorCode()); - auto &DL = I->second; - Result.push_back({}); - - for (const auto &KV : E.second) { - auto &SymString = KV.first; - bool WeakReference = KV.second; - - const char *Sym = SymString.c_str(); -#ifdef __APPLE__ - if (*Sym == '_') - ++Sym; -#endif - - void *Addr = DL.getAddressOfSymbol(Sym); - if (!Addr && !WeakReference) - return make_error<StringError>(Twine("Missing definition for ") + Sym, - inconvertibleErrorCode()); - - Result.back().push_back(pointerToJITTargetAddress(Addr)); - } - } - - return Result; - } - - int64_t runMain(JITTargetAddress MainFnAddr, - const std::vector<std::string> &Args) { - Optional<StringRef> ProgramNameOverride; - if (ProgramName) - ProgramNameOverride = *ProgramName; - - return runAsMain( - jitTargetAddressToFunction<int (*)(int, char *[])>(MainFnAddr), Args, - ProgramNameOverride); - } - - shared::WrapperFunctionResult - runWrapper(JITTargetAddress WrapperFnAddr, - const std::vector<uint8_t> &ArgBuffer) { - using WrapperFnTy = shared::detail::CWrapperFunctionResult (*)( - const char *Data, uint64_t Size); - auto *WrapperFn = jitTargetAddressToFunction<WrapperFnTy>(WrapperFnAddr); - return WrapperFn(reinterpret_cast<const char *>(ArgBuffer.data()), - ArgBuffer.size()); - } - - void closeConnection() { Finished = true; } - - std::string TripleStr; - uint64_t PageSize = 0; - Optional<std::string> ProgramName; - RPCEndpointT &EP; - std::atomic<bool> Finished{false}; - DenseMap<tpctypes::DylibHandle, sys::DynamicLibrary> Dylibs; -}; - -} // end namespace orc -} // end namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h index 3b4aabb90371..735aa53e41fd 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h @@ -33,10 +33,26 @@ Error deregisterEHFrameSection(const void *EHFrameSectionAddr, } // end namespace orc } // end namespace llvm -extern "C" llvm::orc::shared::detail::CWrapperFunctionResult +/// An eh-frame registration utility suitable for use as a support function +/// call. This function expects the direct address and size of the eh-frame +/// section to register as its arguments (it does not treat its arguments as +/// pointers to an SPS-serialized arg buffer). +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_registerEHFrameSectionCustomDirectWrapper( + const char *EHFrameSectionAddr, uint64_t Size); + +/// An eh-frame deregistration utility suitable for use as a support function +/// call. This function expects the direct address and size of the eh-frame +/// section to register as its arguments (it does not treat its arguments as +/// pointers to an SPS-serialized arg buffer). +extern "C" llvm::orc::shared::CWrapperFunctionResult +llvm_orc_deregisterEHFrameSectionCustomDirectWrapper( + const char *EHFrameSectionAddr, uint64_t Size); + +extern "C" llvm::orc::shared::CWrapperFunctionResult llvm_orc_registerEHFrameSectionWrapper(const char *Data, uint64_t Size); -extern "C" llvm::orc::shared::detail::CWrapperFunctionResult +extern "C" llvm::orc::shared::CWrapperFunctionResult llvm_orc_deregisterEHFrameSectionWrapper(const char *Data, uint64_t Size); #endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_REGISTEREHFRAMES_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h new file mode 100644 index 000000000000..cbab234f8a2d --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h @@ -0,0 +1,64 @@ +//===--------------- SimpleExecutorDylibManager.h ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A simple dynamic library management class. Allows dynamic libraries to be +// loaded and searched. +// +// FIXME: The functionality in this file should be moved to the ORC runtime. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORDYLIBMANAGER_H +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORDYLIBMANAGER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" +#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h" +#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h" +#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/Error.h" + +#include <mutex> + +namespace llvm { +namespace orc { +namespace rt_bootstrap { + +/// Simple page-based allocator. +class SimpleExecutorDylibManager : public ExecutorBootstrapService { +public: + virtual ~SimpleExecutorDylibManager(); + + Expected<tpctypes::DylibHandle> open(const std::string &Path, uint64_t Mode); + Expected<std::vector<ExecutorAddr>> lookup(tpctypes::DylibHandle H, + const RemoteSymbolLookupSet &L); + + Error shutdown() override; + void addBootstrapSymbols(StringMap<ExecutorAddr> &M) override; + +private: + using DylibsMap = DenseMap<uint64_t, sys::DynamicLibrary>; + + static llvm::orc::shared::CWrapperFunctionResult + openWrapper(const char *ArgData, size_t ArgSize); + + static llvm::orc::shared::CWrapperFunctionResult + lookupWrapper(const char *ArgData, size_t ArgSize); + + std::mutex M; + uint64_t NextId = 0; + DylibsMap Dylibs; +}; + +} // end namespace rt_bootstrap +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORDYLIBMANAGER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h new file mode 100644 index 000000000000..6858f6d4db6e --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h @@ -0,0 +1,70 @@ +//===---------------- SimpleExecutorMemoryManager.h -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A simple allocator class suitable for basic remote-JIT use. +// +// FIXME: The functionality in this file should be moved to the ORC runtime. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORMEMORYMANAGER_H +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORMEMORYMANAGER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" +#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h" +#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h" +#include "llvm/Support/Error.h" + +#include <mutex> + +namespace llvm { +namespace orc { +namespace rt_bootstrap { + +/// Simple page-based allocator. +class SimpleExecutorMemoryManager : public ExecutorBootstrapService { +public: + virtual ~SimpleExecutorMemoryManager(); + + Expected<ExecutorAddr> allocate(uint64_t Size); + Error finalize(tpctypes::FinalizeRequest &FR); + Error deallocate(const std::vector<ExecutorAddr> &Bases); + + Error shutdown() override; + void addBootstrapSymbols(StringMap<ExecutorAddr> &M) override; + +private: + struct Allocation { + size_t Size = 0; + std::vector<tpctypes::WrapperFunctionCall> DeallocationActions; + }; + + using AllocationsMap = DenseMap<void *, Allocation>; + + Error deallocateImpl(void *Base, Allocation &A); + + static llvm::orc::shared::CWrapperFunctionResult + reserveWrapper(const char *ArgData, size_t ArgSize); + + static llvm::orc::shared::CWrapperFunctionResult + finalizeWrapper(const char *ArgData, size_t ArgSize); + + static llvm::orc::shared::CWrapperFunctionResult + deallocateWrapper(const char *ArgData, size_t ArgSize); + + std::mutex M; + AllocationsMap Allocations; +}; + +} // end namespace rt_bootstrap +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORMEMORYMANAGER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h new file mode 100644 index 000000000000..afd3d39dbb53 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h @@ -0,0 +1,182 @@ +//===---- SimpleRemoteEPCServer.h - EPC over abstract channel ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// EPC over simple abstract channel. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEREMOTEEPCSERVER_H +#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEREMOTEEPCSERVER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FunctionExtras.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h" +#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h" +#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h" +#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/Error.h" + +#include <condition_variable> +#include <future> +#include <memory> +#include <mutex> + +namespace llvm { +namespace orc { + +/// A simple EPC server implementation. +class SimpleRemoteEPCServer : public SimpleRemoteEPCTransportClient { +public: + using ReportErrorFunction = unique_function<void(Error)>; + + /// Dispatches calls to runWrapper. + class Dispatcher { + public: + virtual ~Dispatcher(); + virtual void dispatch(unique_function<void()> Work) = 0; + virtual void shutdown() = 0; + }; + +#if LLVM_ENABLE_THREADS + class ThreadDispatcher : public Dispatcher { + public: + void dispatch(unique_function<void()> Work) override; + void shutdown() override; + + private: + std::mutex DispatchMutex; + bool Running = true; + size_t Outstanding = 0; + std::condition_variable OutstandingCV; + }; +#endif + + class Setup { + friend class SimpleRemoteEPCServer; + + public: + SimpleRemoteEPCServer &server() { return S; } + StringMap<ExecutorAddr> &bootstrapSymbols() { return BootstrapSymbols; } + std::vector<std::unique_ptr<ExecutorBootstrapService>> &services() { + return Services; + } + void setDispatcher(std::unique_ptr<Dispatcher> D) { S.D = std::move(D); } + void setErrorReporter(unique_function<void(Error)> ReportError) { + S.ReportError = std::move(ReportError); + } + + private: + Setup(SimpleRemoteEPCServer &S) : S(S) {} + SimpleRemoteEPCServer &S; + StringMap<ExecutorAddr> BootstrapSymbols; + std::vector<std::unique_ptr<ExecutorBootstrapService>> Services; + }; + + static StringMap<ExecutorAddr> defaultBootstrapSymbols(); + + template <typename TransportT, typename... TransportTCtorArgTs> + static Expected<std::unique_ptr<SimpleRemoteEPCServer>> + Create(unique_function<Error(Setup &S)> SetupFunction, + TransportTCtorArgTs &&...TransportTCtorArgs) { + auto Server = std::make_unique<SimpleRemoteEPCServer>(); + Setup S(*Server); + if (auto Err = SetupFunction(S)) + return std::move(Err); + + // Set ReportError up-front so that it can be used if construction + // process fails. + if (!Server->ReportError) + Server->ReportError = [](Error Err) { + logAllUnhandledErrors(std::move(Err), errs(), "SimpleRemoteEPCServer "); + }; + + // Attempt to create transport. + auto T = TransportT::Create( + *Server, std::forward<TransportTCtorArgTs>(TransportTCtorArgs)...); + if (!T) + return T.takeError(); + Server->T = std::move(*T); + if (auto Err = Server->T->start()) + return std::move(Err); + + // If transport creation succeeds then start up services. + Server->Services = std::move(S.services()); + Server->Services.push_back( + std::make_unique<rt_bootstrap::SimpleExecutorDylibManager>()); + for (auto &Service : Server->Services) + Service->addBootstrapSymbols(S.bootstrapSymbols()); + + if (auto Err = Server->sendSetupMessage(std::move(S.BootstrapSymbols))) + return std::move(Err); + return std::move(Server); + } + + /// Set an error reporter for this server. + void setErrorReporter(ReportErrorFunction ReportError) { + this->ReportError = std::move(ReportError); + } + + /// Call to handle an incoming message. + /// + /// Returns 'Disconnect' if the message is a 'detach' message from the remote + /// otherwise returns 'Continue'. If the server has moved to an error state, + /// returns an error, which should be reported and treated as a 'Disconnect'. + Expected<HandleMessageAction> + handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, ExecutorAddr TagAddr, + SimpleRemoteEPCArgBytesVector ArgBytes) override; + + Error waitForDisconnect(); + + void handleDisconnect(Error Err) override; + +private: + Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, + ExecutorAddr TagAddr, ArrayRef<char> ArgBytes); + + Error sendSetupMessage(StringMap<ExecutorAddr> BootstrapSymbols); + + Error handleResult(uint64_t SeqNo, ExecutorAddr TagAddr, + SimpleRemoteEPCArgBytesVector ArgBytes); + void handleCallWrapper(uint64_t RemoteSeqNo, ExecutorAddr TagAddr, + SimpleRemoteEPCArgBytesVector ArgBytes); + + shared::WrapperFunctionResult + doJITDispatch(const void *FnTag, const char *ArgData, size_t ArgSize); + + static shared::CWrapperFunctionResult jitDispatchEntry(void *DispatchCtx, + const void *FnTag, + const char *ArgData, + size_t ArgSize); + + uint64_t getNextSeqNo() { return NextSeqNo++; } + void releaseSeqNo(uint64_t) {} + + using PendingJITDispatchResultsMap = + DenseMap<uint64_t, std::promise<shared::WrapperFunctionResult> *>; + + std::mutex ServerStateMutex; + std::condition_variable ShutdownCV; + enum { ServerRunning, ServerShuttingDown, ServerShutDown } RunState; + Error ShutdownErr = Error::success(); + std::unique_ptr<SimpleRemoteEPCTransport> T; + std::unique_ptr<Dispatcher> D; + std::vector<std::unique_ptr<ExecutorBootstrapService>> Services; + ReportErrorFunction ReportError; + + uint64_t NextSeqNo = 0; + PendingJITDispatchResultsMap PendingJITDispatchResults; + std::vector<sys::DynamicLibrary> Dylibs; +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEREMOTEEPCSERVER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h new file mode 100644 index 000000000000..c57264e59655 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h @@ -0,0 +1,131 @@ +//===--------- TaskDispatch.h - ORC task dispatch utils ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Task and TaskDispatch classes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_TASKDISPATCH_H +#define LLVM_EXECUTIONENGINE_ORC_TASKDISPATCH_H + +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ExtensibleRTTI.h" +#include "llvm/Support/raw_ostream.h" + +#include <cassert> +#include <string> + +#if LLVM_ENABLE_THREADS +#include <condition_variable> +#include <mutex> +#include <thread> +#endif + +namespace llvm { +namespace orc { + +/// Represents an abstract task for ORC to run. +class Task : public RTTIExtends<Task, RTTIRoot> { +public: + static char ID; + + virtual ~Task() {} + + /// Description of the task to be performed. Used for logging. + virtual void printDescription(raw_ostream &OS) = 0; + + /// Run the task. + virtual void run() = 0; + +private: + void anchor() override; +}; + +/// Base class for generic tasks. +class GenericNamedTask : public RTTIExtends<GenericNamedTask, Task> { +public: + static char ID; + static const char *DefaultDescription; +}; + +/// Generic task implementation. +template <typename FnT> class GenericNamedTaskImpl : public GenericNamedTask { +public: + GenericNamedTaskImpl(FnT &&Fn, std::string DescBuffer) + : Fn(std::forward<FnT>(Fn)), Desc(DescBuffer.c_str()), + DescBuffer(std::move(DescBuffer)) {} + GenericNamedTaskImpl(FnT &&Fn, const char *Desc) + : Fn(std::forward<FnT>(Fn)), Desc(Desc) { + assert(Desc && "Description cannot be null"); + } + void printDescription(raw_ostream &OS) override { OS << Desc; } + void run() override { Fn(); } + +private: + FnT Fn; + const char *Desc; + std::string DescBuffer; +}; + +/// Create a generic named task from a std::string description. +template <typename FnT> +std::unique_ptr<GenericNamedTask> makeGenericNamedTask(FnT &&Fn, + std::string Desc) { + return std::make_unique<GenericNamedTaskImpl<FnT>>(std::forward<FnT>(Fn), + std::move(Desc)); +} + +/// Create a generic named task from a const char * description. +template <typename FnT> +std::unique_ptr<GenericNamedTask> +makeGenericNamedTask(FnT &&Fn, const char *Desc = nullptr) { + if (!Desc) + Desc = GenericNamedTask::DefaultDescription; + return std::make_unique<GenericNamedTaskImpl<FnT>>(std::forward<FnT>(Fn), + Desc); +} + +/// Abstract base for classes that dispatch ORC Tasks. +class TaskDispatcher { +public: + virtual ~TaskDispatcher(); + + /// Run the given task. + virtual void dispatch(std::unique_ptr<Task> T) = 0; + + /// Called by ExecutionSession. Waits until all tasks have completed. + virtual void shutdown() = 0; +}; + +/// Runs all tasks on the current thread. +class InPlaceTaskDispatcher : public TaskDispatcher { +public: + void dispatch(std::unique_ptr<Task> T) override; + void shutdown() override; +}; + +#if LLVM_ENABLE_THREADS + +class DynamicThreadPoolTaskDispatcher : public TaskDispatcher { +public: + void dispatch(std::unique_ptr<Task> T) override; + void shutdown() override; +private: + std::mutex DispatchMutex; + bool Running = true; + size_t Outstanding = 0; + std::condition_variable OutstandingCV; +}; + +#endif // LLVM_ENABLE_THREADS + +} // End namespace orc +} // End namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_TASKDISPATCH_H diff --git a/llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h b/llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h deleted file mode 100644 index 6cca1933f39f..000000000000 --- a/llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h +++ /dev/null @@ -1,37 +0,0 @@ -//===---- OrcMCJITReplacement.h - Orc-based MCJIT replacement ---*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file forces OrcMCJITReplacement to link in on certain operating systems. -// (Windows). -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORCMCJITREPLACEMENT_H -#define LLVM_EXECUTIONENGINE_ORCMCJITREPLACEMENT_H - -#include "llvm/ExecutionEngine/ExecutionEngine.h" -#include <cstdlib> - -extern "C" void LLVMLinkInOrcMCJITReplacement(); - -namespace { - struct ForceOrcMCJITReplacementLinking { - ForceOrcMCJITReplacementLinking() { - // We must reference OrcMCJITReplacement in such a way that compilers will - // not delete it all as dead code, even with whole program optimization, - // yet is effectively a NO-OP. As the compiler isn't smart enough to know - // that getenv() never returns -1, this will do the job. - if (std::getenv("bar") != (char*) -1) - return; - - LLVMLinkInOrcMCJITReplacement(); - } - } ForceOrcMCJITReplacementLinking; -} - -#endif diff --git a/llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h b/llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h deleted file mode 100644 index 7ed254b3ee04..000000000000 --- a/llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h +++ /dev/null @@ -1,22 +0,0 @@ -//===------ OrcV1Deprecation.h - Memory manager for MC-JIT ------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Tag for suppressing ORCv1 deprecation warnings. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H -#define LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H - -namespace llvm { - -enum ORCv1DeprecationAcknowledgement { AcknowledgeORCv1Deprecation }; - -} // namespace llvm - -#endif // LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H diff --git a/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h b/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h index 128c9967a596..c434b45077a3 100644 --- a/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h +++ b/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -112,6 +112,20 @@ public: StringRef SectionName, bool IsReadOnly) = 0; + /// An allocated TLS section + struct TLSSection { + /// The pointer to the initialization image + uint8_t *InitializationImage; + /// The TLS offset + intptr_t Offset; + }; + + /// Allocate a memory block of (at least) the given size to be used for + /// thread-local storage (TLS). + virtual TLSSection allocateTLSSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + StringRef SectionName); + /// Inform the memory manager about the total amount of memory required to /// allocate all sections to be loaded: /// \p CodeSize - the total size of all code sections diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 3dc6194c7830..5ee379b7fcad 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -144,6 +144,26 @@ def OMPC_Schedule : Clause<"schedule"> { ]; } +def OMP_MEMORY_ORDER_SeqCst : ClauseVal<"seq_cst", 1, 1> {} +def OMP_MEMORY_ORDER_AcqRel : ClauseVal<"acq_rel", 2, 1> {} +def OMP_MEMORY_ORDER_Acquire : ClauseVal<"acquire", 3, 1> {} +def OMP_MEMORY_ORDER_Release : ClauseVal<"release", 4, 1> {} +def OMP_MEMORY_ORDER_Relaxed : ClauseVal<"relaxed", 5, 1> {} +def OMP_MEMORY_ORDER_Default : ClauseVal<"default", 6, 0> { + let isDefault = 1; +} +def OMPC_MemoryOrder : Clause<"memory_order"> { + let enumClauseValue = "MemoryOrderKind"; + let allowedClauseValues = [ + OMP_MEMORY_ORDER_SeqCst, + OMP_MEMORY_ORDER_AcqRel, + OMP_MEMORY_ORDER_Acquire, + OMP_MEMORY_ORDER_Release, + OMP_MEMORY_ORDER_Relaxed, + OMP_MEMORY_ORDER_Default + ]; +} + def OMPC_Ordered : Clause<"ordered"> { let clangClass = "OMPOrderedClause"; let flangClass = "ScalarIntConstantExpr"; @@ -261,13 +281,17 @@ def OMPC_Allocate : Clause<"allocate"> { } def OMPC_NonTemporal : Clause<"nontemporal"> { let clangClass = "OMPNontemporalClause"; + let flangClass = "Name"; + let isValueList = true; } -def OMP_ORDER_concurrent : ClauseVal<"default",2,0> { let isDefault = 1; } +def OMP_ORDER_concurrent : ClauseVal<"concurrent",1,1> {} +def OMP_ORDER_unknown : ClauseVal<"unknown",2,0> { let isDefault = 1; } def OMPC_Order : Clause<"order"> { let clangClass = "OMPOrderClause"; let enumClauseValue = "OrderKind"; let allowedClauseValues = [ + OMP_ORDER_unknown, OMP_ORDER_concurrent ]; } @@ -312,6 +336,8 @@ def OMPC_Uniform : Clause<"uniform"> { } def OMPC_DeviceType : Clause<"device_type"> {} def OMPC_Match : Clause<"match"> {} +def OMPC_AdjustArgs : Clause<"adjust_args"> { } +def OMPC_AppendArgs : Clause<"append_args"> { } def OMPC_Depobj : Clause<"depobj"> { let clangClass = "OMPDepobjClause"; let isImplicit = true; @@ -337,6 +363,14 @@ def OMPC_Filter : Clause<"filter"> { let clangClass = "OMPFilterClause"; let flangClass = "ScalarIntExpr"; } +def OMPC_Align : Clause<"align"> { + let clangClass = "OMPAlignClause"; +} +def OMPC_When: Clause<"when"> {} + +def OMPC_Bind : Clause<"bind"> { + let clangClass = "OMPBindClause"; +} //===----------------------------------------------------------------------===// // Definition of OpenMP directives @@ -473,8 +507,8 @@ def OMP_TaskWait : Directive<"taskwait"> { } def OMP_TaskGroup : Directive<"taskgroup"> { let allowedClauses = [ - VersionedClause<OMPC_TaskReduction>, - VersionedClause<OMPC_Allocate> + VersionedClause<OMPC_TaskReduction, 50>, + VersionedClause<OMPC_Allocate, 50> ]; } def OMP_Flush : Directive<"flush"> { @@ -489,10 +523,12 @@ def OMP_Flush : Directive<"flush"> { } def OMP_Ordered : Directive<"ordered"> { let allowedClauses = [ - VersionedClause<OMPC_Threads>, - VersionedClause<OMPC_Simd>, VersionedClause<OMPC_Depend> ]; + let allowedOnceClauses = [ + VersionedClause<OMPC_Threads>, + VersionedClause<OMPC_Simd> + ]; } def OMP_Atomic : Directive<"atomic"> { let allowedClauses = [ @@ -1506,13 +1542,18 @@ def OMP_TargetTeamsDistributeSimd : } def OMP_Allocate : Directive<"allocate"> { let allowedOnceClauses = [ - VersionedClause<OMPC_Allocator> + VersionedClause<OMPC_Allocator>, + VersionedClause<OMPC_Align, 51> ]; } def OMP_DeclareVariant : Directive<"declare variant"> { let allowedClauses = [ VersionedClause<OMPC_Match> ]; + let allowedExclusiveClauses = [ + VersionedClause<OMPC_AdjustArgs, 51>, + VersionedClause<OMPC_AppendArgs, 51> + ]; } def OMP_MasterTaskloop : Directive<"master taskloop"> { let allowedClauses = [ @@ -1699,6 +1740,22 @@ def OMP_masked : Directive<"masked"> { VersionedClause<OMPC_Filter> ]; } +def OMP_loop : Directive<"loop"> { + let allowedClauses = [ + VersionedClause<OMPC_LastPrivate>, + VersionedClause<OMPC_Private>, + VersionedClause<OMPC_Reduction>, + ]; + let allowedOnceClauses = [ + VersionedClause<OMPC_Bind, 50>, + VersionedClause<OMPC_Collapse>, + VersionedClause<OMPC_Order>, + ]; +} +def OMP_Metadirective : Directive<"metadirective"> { + let allowedClauses = [VersionedClause<OMPC_When>]; + let allowedOnceClauses = [VersionedClause<OMPC_Default>]; +} def OMP_Unknown : Directive<"unknown"> { let isDefault = true; } diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index d174cc8992dd..2fec3e7e4230 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -128,6 +128,14 @@ enum class OMPScheduleType { LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierMask) }; +enum OMPTgtExecModeFlags : int8_t { + OMP_TGT_EXEC_MODE_GENERIC = 1 << 0, + OMP_TGT_EXEC_MODE_SPMD = 1 << 1, + OMP_TGT_EXEC_MODE_GENERIC_SPMD = + OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD, + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ OMP_TGT_EXEC_MODE_GENERIC_SPMD) +}; + } // end namespace omp } // end namespace llvm diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h index 0b6aed1e9e12..89f5de229b3b 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h @@ -29,100 +29,89 @@ namespace omp { /// /// Example usage in clang: /// const unsigned slot_size = -/// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size); +/// ctx.GetTargetInfo().getGridValue().GV_Warp_Size; /// /// Example usage in libomptarget/deviceRTLs: /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" /// #ifdef __AMDGPU__ -/// #define GRIDVAL AMDGPUGpuGridValues +/// #define GRIDVAL AMDGPUGridValues /// #else -/// #define GRIDVAL NVPTXGpuGridValues +/// #define GRIDVAL NVPTXGridValues /// #endif /// ... Then use this reference for GV_Warp_Size in the deviceRTL source. -/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] +/// llvm::omp::GRIDVAL().GV_Warp_Size /// /// Example usage in libomptarget hsa plugin: /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" -/// #define GRIDVAL AMDGPUGpuGridValues +/// #define GRIDVAL AMDGPUGridValues /// ... Then use this reference to access GV_Warp_Size in the hsa plugin. -/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] +/// llvm::omp::GRIDVAL().GV_Warp_Size /// /// Example usage in libomptarget cuda plugin: /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" -/// #define GRIDVAL NVPTXGpuGridValues +/// #define GRIDVAL NVPTXGridValues /// ... Then use this reference to access GV_Warp_Size in the cuda plugin. -/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size] +/// llvm::omp::GRIDVAL().GV_Warp_Size /// -enum GVIDX { - /// The maximum number of workers in a kernel. - /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z - GV_Threads, + +struct GV { /// The size reserved for data in a shared memory slot. - GV_Slot_Size, + const unsigned GV_Slot_Size; /// The default value of maximum number of threads in a worker warp. - GV_Warp_Size, - /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size - /// for NVPTX. - GV_Warp_Size_32, - /// The number of bits required to represent the max number of threads in warp - GV_Warp_Size_Log2, - /// GV_Warp_Size * GV_Slot_Size, - GV_Warp_Slot_Size, + const unsigned GV_Warp_Size; + + constexpr unsigned warpSlotSize() const { + return GV_Warp_Size * GV_Slot_Size; + } + /// the maximum number of teams. - GV_Max_Teams, - /// Global Memory Alignment - GV_Mem_Align, - /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) - GV_Warp_Size_Log2_Mask, + const unsigned GV_Max_Teams; // An alternative to the heavy data sharing infrastructure that uses global // memory is one that uses device __shared__ memory. The amount of such space // (in bytes) reserved by the OpenMP runtime is noted here. - GV_SimpleBufferSize, + const unsigned GV_SimpleBufferSize; // The absolute maximum team size for a working group - GV_Max_WG_Size, + const unsigned GV_Max_WG_Size; // The default maximum team size for a working group - GV_Default_WG_Size, - // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN. - GV_Max_Warp_Number, - /// The slot size that should be reserved for a working warp. - /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2)) - GV_Warp_Size_Log2_MaskL + const unsigned GV_Default_WG_Size; + + constexpr unsigned maxWarpNumber() const { + return GV_Max_WG_Size / GV_Warp_Size; + } }; /// For AMDGPU GPUs -static constexpr unsigned AMDGPUGpuGridValues[] = { - 448, // GV_Threads - 256, // GV_Slot_Size - 64, // GV_Warp_Size - 32, // GV_Warp_Size_32 - 6, // GV_Warp_Size_Log2 - 64 * 256, // GV_Warp_Slot_Size - 128, // GV_Max_Teams - 256, // GV_Mem_Align - 63, // GV_Warp_Size_Log2_Mask - 896, // GV_SimpleBufferSize - 1024, // GV_Max_WG_Size, - 256, // GV_Defaut_WG_Size - 1024 / 64, // GV_Max_WG_Size / GV_WarpSize - 63 // GV_Warp_Size_Log2_MaskL +static constexpr GV AMDGPUGridValues64 = { + 256, // GV_Slot_Size + 64, // GV_Warp_Size + 128, // GV_Max_Teams + 896, // GV_SimpleBufferSize + 1024, // GV_Max_WG_Size, + 256, // GV_Default_WG_Size }; +static constexpr GV AMDGPUGridValues32 = { + 256, // GV_Slot_Size + 32, // GV_Warp_Size + 128, // GV_Max_Teams + 896, // GV_SimpleBufferSize + 1024, // GV_Max_WG_Size, + 256, // GV_Default_WG_Size +}; + +template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() { + static_assert(wavesize == 32 || wavesize == 64, ""); + return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64; +} + /// For Nvidia GPUs -static constexpr unsigned NVPTXGpuGridValues[] = { - 992, // GV_Threads - 256, // GV_Slot_Size - 32, // GV_Warp_Size - 32, // GV_Warp_Size_32 - 5, // GV_Warp_Size_Log2 - 32 * 256, // GV_Warp_Slot_Size - 1024, // GV_Max_Teams - 256, // GV_Mem_Align - (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask - 896, // GV_SimpleBufferSize - 1024, // GV_Max_WG_Size - 128, // GV_Defaut_WG_Size - 1024 / 32, // GV_Max_WG_Size / GV_WarpSize - 31 // GV_Warp_Size_Log2_MaskL +static constexpr GV NVPTXGridValues = { + 256, // GV_Slot_Size + 32, // GV_Warp_Size + 1024, // GV_Max_Teams + 896, // GV_SimpleBufferSize + 1024, // GV_Max_WG_Size + 128, // GV_Default_WG_Size }; } // namespace omp diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 8144f1527a06..563e0eed1762 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -257,18 +257,17 @@ public: /// /// * Sign of the step and the comparison operator might disagree: /// - /// for (int i = 0; i < 42; --i) + /// for (int i = 0; i < 42; i -= 1u) /// // /// \param Loc The insert and source location description. /// \param BodyGenCB Callback that will generate the loop body code. /// \param Start Value of the loop counter for the first iterations. - /// \param Stop Loop counter values past this will stop the the - /// iterations. + /// \param Stop Loop counter values past this will stop the loop. /// \param Step Loop counter increment after each iteration; negative - /// means counting down. \param IsSigned Whether Start, Stop - /// and Stop are signed integers. - /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop + /// means counting down. + /// \param IsSigned Whether Start, Stop and Step are signed integers. + /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop /// counter. /// \param ComputeIP Insertion point for instructions computing the trip /// count. Can be used to ensure the trip count is available @@ -335,7 +334,7 @@ public: /// has a trip count of 0). This is permitted by the OpenMP specification. /// /// \param DL Debug location for instructions added for collapsing, - /// such as instructions to compute derive the input loop's + /// such as instructions to compute/derive the input loop's /// induction variables. /// \param Loops Loops in the loop nest to collapse. Loops are specified /// from outermost-to-innermost and every control flow of a @@ -358,8 +357,16 @@ public: /// the current thread, updates the relevant instructions in the canonical /// loop and calls to an OpenMP runtime finalization function after the loop. /// - /// \param Loc The source location description, the insertion location - /// is not used. + /// TODO: Workshare loops with static scheduling may contain up to two loops + /// that fulfill the requirements of an OpenMP canonical loop. One for + /// iterating over all iterations of a chunk and another one for iterating + /// over all chunks that are executed on the same thread. Returning + /// CanonicalLoopInfo objects representing them may eventually be useful for + /// the apply clause planned in OpenMP 6.0, but currently whether these are + /// canonical loops is irrelevant. + /// + /// \param DL Debug location for instructions added for the + /// workshare-loop construct itself. /// \param CLI A descriptor of the canonical loop to workshare. /// \param AllocaIP An insertion point for Alloca instructions usable in the /// preheader of the loop. @@ -368,12 +375,11 @@ public: /// \param Chunk The size of loop chunk considered as a unit when /// scheduling. If \p nullptr, defaults to 1. /// - /// \returns Updated CanonicalLoopInfo. - CanonicalLoopInfo *createStaticWorkshareLoop(const LocationDescription &Loc, - CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, - bool NeedsBarrier, - Value *Chunk = nullptr); + /// \returns Point where to insert code after the workshare construct. + InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, + bool NeedsBarrier, + Value *Chunk = nullptr); /// Modifies the canonical loop to be a dynamically-scheduled workshare loop. /// @@ -382,8 +388,9 @@ public: /// turn it into a workshare loop. In particular, it calls to an OpenMP /// runtime function in the preheader to obtain, and then in each iteration /// to update the loop counter. - /// \param Loc The source location description, the insertion location - /// is not used. + /// + /// \param DL Debug location for instructions added for the + /// workshare-loop construct itself. /// \param CLI A descriptor of the canonical loop to workshare. /// \param AllocaIP An insertion point for Alloca instructions usable in the /// preheader of the loop. @@ -393,13 +400,12 @@ public: /// \param Chunk The size of loop chunk considered as a unit when /// scheduling. If \p nullptr, defaults to 1. /// - /// \returns Point where to insert code after the loop. - InsertPointTy createDynamicWorkshareLoop(const LocationDescription &Loc, - CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, - omp::OMPScheduleType SchedType, - bool NeedsBarrier, - Value *Chunk = nullptr); + /// \returns Point where to insert code after the workshare construct. + InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, + omp::OMPScheduleType SchedType, + bool NeedsBarrier, + Value *Chunk = nullptr); /// Modifies the canonical loop to be a workshare loop. /// @@ -410,19 +416,17 @@ public: /// the current thread, updates the relevant instructions in the canonical /// loop and calls to an OpenMP runtime finalization function after the loop. /// - /// \param Loc The source location description, the insertion location - /// is not used. + /// \param DL Debug location for instructions added for the + /// workshare-loop construct itself. /// \param CLI A descriptor of the canonical loop to workshare. /// \param AllocaIP An insertion point for Alloca instructions usable in the /// preheader of the loop. /// \param NeedsBarrier Indicates whether a barrier must be insterted after /// the loop. /// - /// \returns Updated CanonicalLoopInfo. - CanonicalLoopInfo *createWorkshareLoop(const LocationDescription &Loc, - CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, - bool NeedsBarrier); + /// \returns Point where to insert code after the workshare construct. + InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, bool NeedsBarrier); /// Tile a loop nest. /// @@ -471,6 +475,48 @@ public: tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops, ArrayRef<Value *> TileSizes); + /// Fully unroll a loop. + /// + /// Instead of unrolling the loop immediately (and duplicating its body + /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop + /// metadata. + /// + /// \param DL Debug location for instructions added by unrolling. + /// \param Loop The loop to unroll. The loop will be invalidated. + void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop); + + /// Fully or partially unroll a loop. How the loop is unrolled is determined + /// using LLVM's LoopUnrollPass. + /// + /// \param DL Debug location for instructions added by unrolling. + /// \param Loop The loop to unroll. The loop will be invalidated. + void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop); + + /// Partially unroll a loop. + /// + /// The CanonicalLoopInfo of the unrolled loop for use with chained + /// loop-associated directive can be requested using \p UnrolledCLI. Not + /// needing the CanonicalLoopInfo allows more efficient code generation by + /// deferring the actual unrolling to the LoopUnrollPass using loop metadata. + /// A loop-associated directive applied to the unrolled loop needs to know the + /// new trip count which means that if using a heuristically determined unroll + /// factor (\p Factor == 0), that factor must be computed immediately. We are + /// using the same logic as the LoopUnrollPass to derived the unroll factor, + /// but which assumes that some canonicalization has taken place (e.g. + /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform + /// better when the unrolled loop's CanonicalLoopInfo is not needed. + /// + /// \param DL Debug location for instructions added by unrolling. + /// \param Loop The loop to unroll. The loop will be invalidated. + /// \param Factor The factor to unroll the loop by. A factor of 0 + /// indicates that a heuristic should be used to determine + /// the unroll-factor. + /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the + /// partially unrolled loop. Otherwise, uses loop metadata + /// to defer unrolling to the LoopUnrollPass. + void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, + CanonicalLoopInfo **UnrolledCLI); + /// Generator for '#omp flush' /// /// \param Loc The location where the flush directive was encountered @@ -486,6 +532,115 @@ public: /// \param Loc The location where the taskyield directive was encountered. void createTaskyield(const LocationDescription &Loc); + /// Functions used to generate reductions. Such functions take two Values + /// representing LHS and RHS of the reduction, respectively, and a reference + /// to the value that is updated to refer to the reduction result. + using ReductionGenTy = + function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>; + + /// Functions used to generate atomic reductions. Such functions take two + /// Values representing pointers to LHS and RHS of the reduction. They are + /// expected to atomically update the LHS to the reduced value. + using AtomicReductionGenTy = + function_ref<InsertPointTy(InsertPointTy, Value *, Value *)>; + + /// Information about an OpenMP reduction. + struct ReductionInfo { + ReductionInfo(Value *Variable, Value *PrivateVariable, + ReductionGenTy ReductionGen, + AtomicReductionGenTy AtomicReductionGen) + : Variable(Variable), PrivateVariable(PrivateVariable), + ReductionGen(ReductionGen), AtomicReductionGen(AtomicReductionGen) {} + + /// Returns the type of the element being reduced. + Type *getElementType() const { + return Variable->getType()->getPointerElementType(); + } + + /// Reduction variable of pointer type. + Value *Variable; + + /// Thread-private partial reduction variable. + Value *PrivateVariable; + + /// Callback for generating the reduction body. The IR produced by this will + /// be used to combine two values in a thread-safe context, e.g., under + /// lock or within the same thread, and therefore need not be atomic. + ReductionGenTy ReductionGen; + + /// Callback for generating the atomic reduction body, may be null. The IR + /// produced by this will be used to atomically combine two values during + /// reduction. If null, the implementation will use the non-atomic version + /// along with the appropriate synchronization mechanisms. + AtomicReductionGenTy AtomicReductionGen; + }; + + // TODO: provide atomic and non-atomic reduction generators for reduction + // operators defined by the OpenMP specification. + + /// Generator for '#omp reduction'. + /// + /// Emits the IR instructing the runtime to perform the specific kind of + /// reductions. Expects reduction variables to have been privatized and + /// initialized to reduction-neutral values separately. Emits the calls to + /// runtime functions as well as the reduction function and the basic blocks + /// performing the reduction atomically and non-atomically. + /// + /// The code emitted for the following: + /// + /// \code + /// type var_1; + /// type var_2; + /// #pragma omp <directive> reduction(reduction-op:var_1,var_2) + /// /* body */; + /// \endcode + /// + /// corresponds to the following sketch. + /// + /// \code + /// void _outlined_par() { + /// // N is the number of different reductions. + /// void *red_array[] = {privatized_var_1, privatized_var_2, ...}; + /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array, + /// _omp_reduction_func, + /// _gomp_critical_user.reduction.var)) { + /// case 1: { + /// var_1 = var_1 <reduction-op> privatized_var_1; + /// var_2 = var_2 <reduction-op> privatized_var_2; + /// // ... + /// __kmpc_end_reduce(...); + /// break; + /// } + /// case 2: { + /// _Atomic<ReductionOp>(var_1, privatized_var_1); + /// _Atomic<ReductionOp>(var_2, privatized_var_2); + /// // ... + /// break; + /// } + /// default: break; + /// } + /// } + /// + /// void _omp_reduction_func(void **lhs, void **rhs) { + /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0]; + /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1]; + /// // ... + /// } + /// \endcode + /// + /// \param Loc The location where the reduction was + /// encountered. Must be within the associate + /// directive and after the last local access to the + /// reduction variables. + /// \param AllocaIP An insertion point suitable for allocas usable + /// in reductions. + /// \param ReductionInfos A list of info on each reduction variable. + /// \param IsNoWait A flag set if the reduction is marked as nowait. + InsertPointTy createReductions(const LocationDescription &Loc, + InsertPointTy AllocaIP, + ArrayRef<ReductionInfo> ReductionInfos, + bool IsNoWait = false); + ///} /// Return the insertion point used by the underlying IRBuilder. @@ -515,6 +670,10 @@ public: Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, unsigned Line, unsigned Column); + /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as + /// fallback if \p DL does not specify the function name. + Constant *getOrCreateSrcLocStr(DebugLoc DL, Function *F = nullptr); + /// Return the (LLVM-IR) string describing the source location \p Loc. Constant *getOrCreateSrcLocStr(const LocationDescription &Loc); @@ -524,8 +683,8 @@ public: omp::IdentFlag Flags = omp::IdentFlag(0), unsigned Reserve2Flags = 0); - // Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL - Type *getLanemaskType(); + /// Create a global flag \p Namein the module with initial value \p Value. + GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); /// Generate control flow and cleanup for cancellation. /// @@ -651,11 +810,11 @@ public: /// \param Loc The source location description. /// \param MapperFunc Function to be called. /// \param SrcLocInfo Source location information global. - /// \param MaptypesArgs - /// \param MapnamesArg + /// \param MaptypesArg The argument types. + /// \param MapnamesArg The argument names. /// \param MapperAllocas The AllocaInst used for the call. /// \param DeviceID Device ID for the call. - /// \param TotalNbOperand Number of operand in the call. + /// \param NumOperands Number of operands in the call. void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, @@ -705,7 +864,7 @@ public: /// \param BodyGenCB Callback that will generate the region code. /// \param FiniCB Callback to finialize variable copies. /// - /// \returns The insertion position *after* the master. + /// \returns The insertion position *after* the masked. InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter); @@ -718,12 +877,41 @@ public: /// \param CriticalName name of the lock used by the critical directive /// \param HintInst Hint Instruction for hint clause associated with critical /// - /// \returns The insertion position *after* the master. + /// \returns The insertion position *after* the critical. InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst); + /// Generator for '#omp ordered depend (source | sink)' + /// + /// \param Loc The insert and source location description. + /// \param AllocaIP The insertion point to be used for alloca instructions. + /// \param NumLoops The number of loops in depend clause. + /// \param StoreValues The value will be stored in vector address. + /// \param Name The name of alloca instruction. + /// \param IsDependSource If true, depend source; otherwise, depend sink. + /// + /// \return The insertion position *after* the ordered. + InsertPointTy createOrderedDepend(const LocationDescription &Loc, + InsertPointTy AllocaIP, unsigned NumLoops, + ArrayRef<llvm::Value *> StoreValues, + const Twine &Name, bool IsDependSource); + + /// Generator for '#omp ordered [threads | simd]' + /// + /// \param Loc The insert and source location description. + /// \param BodyGenCB Callback that will generate the region code. + /// \param FiniCB Callback to finalize variable copies. + /// \param IsThreads If true, with threads clause or without clause; + /// otherwise, with simd clause; + /// + /// \returns The insertion position *after* the ordered. + InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB, + bool IsThreads); + /// Generator for '#omp sections' /// /// \param Loc The insert and source location description. @@ -816,14 +1004,16 @@ public: /// \param Loc The insert and source location description. /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. /// \param RequiresFullRuntime Indicate if a full device runtime is necessary. - InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime); + InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, + bool RequiresFullRuntime); /// Create a runtime call for kmpc_target_deinit /// /// \param Loc The insert and source location description. /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not. /// \param RequiresFullRuntime Indicate if a full device runtime is necessary. - void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime); + void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, + bool RequiresFullRuntime); ///} @@ -1121,7 +1311,25 @@ public: /// The control-flow structure is standardized for easy consumption by /// directives associated with loops. For instance, the worksharing-loop /// construct may change this control flow such that each loop iteration is -/// executed on only one thread. +/// executed on only one thread. The constraints of a canonical loop in brief +/// are: +/// +/// * The number of loop iterations must have been computed before entering the +/// loop. +/// +/// * Has an (unsigned) logical induction variable that starts at zero and +/// increments by one. +/// +/// * The loop's CFG itself has no side-effects. The OpenMP specification +/// itself allows side-effects, but the order in which they happen, including +/// how often or whether at all, is unspecified. We expect that the frontend +/// will emit those side-effect instructions somewhere (e.g. before the loop) +/// such that the CanonicalLoopInfo itself can be side-effect free. +/// +/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated +/// execution of a loop body that satifies these constraints. It does NOT +/// represent arbitrary SESE regions that happen to contain a loop. Do not use +/// CanonicalLoopInfo for such purposes. /// /// The control flow can be described as follows: /// @@ -1141,73 +1349,149 @@ public: /// | /// After /// -/// Code in the header, condition block, latch and exit block must not have any -/// side-effect. The body block is the single entry point into the loop body, -/// which may contain arbitrary control flow as long as all control paths -/// eventually branch to the latch block. +/// The loop is thought to start at PreheaderIP (at the Preheader's terminator, +/// including) and end at AfterIP (at the After's first instruction, excluding). +/// That is, instructions in the Preheader and After blocks (except the +/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have +/// side-effects. Typically, the Preheader is used to compute the loop's trip +/// count. The instructions from BodyIP (at the Body block's first instruction, +/// excluding) until the Latch are also considered outside CanonicalLoopInfo's +/// control and thus can have side-effects. The body block is the single entry +/// point into the loop body, which may contain arbitrary control flow as long +/// as all control paths eventually branch to the Latch block. +/// +/// TODO: Consider adding another standardized BasicBlock between Body CFG and +/// Latch to guarantee that there is only a single edge to the latch. It would +/// make loop transformations easier to not needing to consider multiple +/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us +/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that +/// executes after each body iteration. +/// +/// There must be no loop-carried dependencies through llvm::Values. This is +/// equivalant to that the Latch has no PHINode and the Header's only PHINode is +/// for the induction variable. +/// +/// All code in Header, Cond, Latch and Exit (plus the terminator of the +/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked +/// by assertOK(). They are expected to not be modified unless explicitly +/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP +/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop, +/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its +/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used +/// anymore as its underlying control flow may not exist anymore. +/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop +/// may also return a new CanonicalLoopInfo that can be passed to other +/// loop-associated construct implementing methods. These loop-transforming +/// methods may either create a new CanonicalLoopInfo usually using +/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and +/// modify one of the input CanonicalLoopInfo and return it as representing the +/// modified loop. What is done is an implementation detail of +/// transformation-implementing method and callers should always assume that the +/// CanonicalLoopInfo passed to it is invalidated and a new object is returned. +/// Returned CanonicalLoopInfo have the same structure and guarantees as the one +/// created by createCanonicalLoop, such that transforming methods do not have +/// to special case where the CanonicalLoopInfo originated from. +/// +/// Generally, methods consuming CanonicalLoopInfo do not need an +/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the +/// CanonicalLoopInfo to insert new or modify existing instructions. Unless +/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate +/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically, +/// any InsertPoint in the Preheader, After or Block can still be used after +/// calling such a method. /// -/// Defined outside OpenMPIRBuilder because one cannot forward-declare nested -/// classes. +/// TODO: Provide mechanisms for exception handling and cancellation points. +/// +/// Defined outside OpenMPIRBuilder because nested classes cannot be +/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h. class CanonicalLoopInfo { friend class OpenMPIRBuilder; private: - /// Whether this object currently represents a loop. - bool IsValid = false; - - BasicBlock *Preheader; - BasicBlock *Header; - BasicBlock *Cond; - BasicBlock *Body; - BasicBlock *Latch; - BasicBlock *Exit; - BasicBlock *After; + BasicBlock *Preheader = nullptr; + BasicBlock *Header = nullptr; + BasicBlock *Cond = nullptr; + BasicBlock *Body = nullptr; + BasicBlock *Latch = nullptr; + BasicBlock *Exit = nullptr; + BasicBlock *After = nullptr; /// Add the control blocks of this loop to \p BBs. /// /// This does not include any block from the body, including the one returned /// by getBody(). + /// + /// FIXME: This currently includes the Preheader and After blocks even though + /// their content is (mostly) not under CanonicalLoopInfo's control. + /// Re-evaluated whether this makes sense. void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs); public: + /// Returns whether this object currently represents the IR of a loop. If + /// returning false, it may have been consumed by a loop transformation or not + /// been intialized. Do not use in this case; + bool isValid() const { return Header; } + /// The preheader ensures that there is only a single edge entering the loop. /// Code that must be execute before any loop iteration can be emitted here, /// such as computing the loop trip count and begin lifetime markers. Code in /// the preheader is not considered part of the canonical loop. - BasicBlock *getPreheader() const { return Preheader; } + BasicBlock *getPreheader() const { + assert(isValid() && "Requires a valid canonical loop"); + return Preheader; + } /// The header is the entry for each iteration. In the canonical control flow, /// it only contains the PHINode for the induction variable. - BasicBlock *getHeader() const { return Header; } + BasicBlock *getHeader() const { + assert(isValid() && "Requires a valid canonical loop"); + return Header; + } /// The condition block computes whether there is another loop iteration. If /// yes, branches to the body; otherwise to the exit block. - BasicBlock *getCond() const { return Cond; } + BasicBlock *getCond() const { + assert(isValid() && "Requires a valid canonical loop"); + return Cond; + } /// The body block is the single entry for a loop iteration and not controlled /// by CanonicalLoopInfo. It can contain arbitrary control flow but must /// eventually branch to the \p Latch block. - BasicBlock *getBody() const { return Body; } + BasicBlock *getBody() const { + assert(isValid() && "Requires a valid canonical loop"); + return Body; + } /// Reaching the latch indicates the end of the loop body code. In the /// canonical control flow, it only contains the increment of the induction /// variable. - BasicBlock *getLatch() const { return Latch; } + BasicBlock *getLatch() const { + assert(isValid() && "Requires a valid canonical loop"); + return Latch; + } /// Reaching the exit indicates no more iterations are being executed. - BasicBlock *getExit() const { return Exit; } + BasicBlock *getExit() const { + assert(isValid() && "Requires a valid canonical loop"); + return Exit; + } /// The after block is intended for clean-up code such as lifetime end /// markers. It is separate from the exit block to ensure, analogous to the /// preheader, it having just a single entry edge and being free from PHI /// nodes should there be multiple loop exits (such as from break /// statements/cancellations). - BasicBlock *getAfter() const { return After; } + BasicBlock *getAfter() const { + assert(isValid() && "Requires a valid canonical loop"); + return After; + } /// Returns the llvm::Value containing the number of loop iterations. It must /// be valid in the preheader and always interpreted as an unsigned integer of /// any bit-width. Value *getTripCount() const { + assert(isValid() && "Requires a valid canonical loop"); Instruction *CmpI = &Cond->front(); assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount"); return CmpI->getOperand(1); @@ -1216,33 +1500,47 @@ public: /// Returns the instruction representing the current logical induction /// variable. Always unsigned, always starting at 0 with an increment of one. Instruction *getIndVar() const { + assert(isValid() && "Requires a valid canonical loop"); Instruction *IndVarPHI = &Header->front(); assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI"); return IndVarPHI; } /// Return the type of the induction variable (and the trip count). - Type *getIndVarType() const { return getIndVar()->getType(); } + Type *getIndVarType() const { + assert(isValid() && "Requires a valid canonical loop"); + return getIndVar()->getType(); + } /// Return the insertion point for user code before the loop. OpenMPIRBuilder::InsertPointTy getPreheaderIP() const { + assert(isValid() && "Requires a valid canonical loop"); return {Preheader, std::prev(Preheader->end())}; }; /// Return the insertion point for user code in the body. OpenMPIRBuilder::InsertPointTy getBodyIP() const { + assert(isValid() && "Requires a valid canonical loop"); return {Body, Body->begin()}; }; /// Return the insertion point for user code after the loop. OpenMPIRBuilder::InsertPointTy getAfterIP() const { + assert(isValid() && "Requires a valid canonical loop"); return {After, After->begin()}; }; - Function *getFunction() const { return Header->getParent(); } + Function *getFunction() const { + assert(isValid() && "Requires a valid canonical loop"); + return Header->getParent(); + } /// Consistency self-check. void assertOK() const; + + /// Invalidate this loop. That is, the underlying IR does not fulfill the + /// requirements of an OpenMP canonical loop anymore. + void invalidate(); }; } // end namespace llvm diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index eb673b199fc4..8e4f7568fb9c 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -39,7 +39,6 @@ __OMP_TYPE(Int32Ptr) __OMP_TYPE(Int64Ptr) OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx)) -OMP_TYPE(LanemaskTy, getLanemaskType()) #define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo()) @@ -272,6 +271,15 @@ __OMP_RTL(__kmpc_for_static_init_8, false, Void, IdentPtr, Int32, Int32, __OMP_RTL(__kmpc_for_static_init_8u, false, Void, IdentPtr, Int32, Int32, Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64) __OMP_RTL(__kmpc_for_static_fini, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_distribute_static_init_4, false, Void, IdentPtr, Int32, Int32, + Int32Ptr, Int32Ptr, Int32Ptr, Int32Ptr, Int32, Int32) +__OMP_RTL(__kmpc_distribute_static_init_4u, false, Void, IdentPtr, Int32, Int32, + Int32Ptr, Int32Ptr, Int32Ptr, Int32Ptr, Int32, Int32) +__OMP_RTL(__kmpc_distribute_static_init_8, false, Void, IdentPtr, Int32, Int32, + Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64) +__OMP_RTL(__kmpc_distribute_static_init_8u, false, Void, IdentPtr, Int32, Int32, + Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64) +__OMP_RTL(__kmpc_distribute_static_fini, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_dist_dispatch_init_4, false, Void, IdentPtr, Int32, Int32, Int32Ptr, Int32, Int32, Int32, Int32) __OMP_RTL(__kmpc_dist_dispatch_init_4u, false, Void, IdentPtr, Int32, Int32, @@ -415,8 +423,8 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr, /* Int */ Int32, /* kmp_task_t */ VoidPtr) /// OpenMP Device runtime functions -__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int1, Int1, Int1) -__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int1, Int1) +__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1, Int1) +__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8, Int1) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) @@ -442,9 +450,12 @@ __OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr) __OMP_RTL(__kmpc_parallel_level, false, Int8, ) __OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, ) __OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_warp_active_thread_mask, false, LanemaskTy,) -__OMP_RTL(__kmpc_syncwarp, false, Void, LanemaskTy) +__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) +__OMP_RTL(__kmpc_syncwarp, false, Void, Int64) + +__OMP_RTL(__kmpc_get_warp_size, false, Int32, ) __OMP_RTL(__kmpc_is_generic_main_thread_id, false, Int8, Int32) @@ -510,6 +521,11 @@ __OMP_ATTRS_SET(NoCaptureAttrs, ? AttributeSet(EnumAttr(NoCapture)) : AttributeSet(EnumAttr(NoCapture))) +__OMP_ATTRS_SET(AlwaysInlineAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(AlwaysInline)) + : AttributeSet(EnumAttr(AlwaysInline))) + #if 0 __OMP_ATTRS_SET(InaccessibleOnlyAttrs, OptimisticAttributes @@ -535,6 +551,11 @@ __OMP_ATTRS_SET(ReadOnlyPtrAttrs, EnumAttr(NoCapture)) : AttributeSet()) +__OMP_ATTRS_SET(DeviceAllocAttrs, + OptimisticAttributes + ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync)) + : AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync))) + #if 0 __OMP_ATTRS_SET(WriteOnlyPtrAttrs, OptimisticAttributes @@ -575,6 +596,8 @@ __OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_barrier_simple_generic, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs()) @@ -703,6 +726,28 @@ __OMP_RTL_ATTRS(__kmpc_for_static_init_8u, GetterArgWriteAttrs, AttributeSet(), AttributeSet(), AttributeSet())) __OMP_RTL_ATTRS(__kmpc_for_static_fini, InaccessibleArgOnlyAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_distribute_static_init_4, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_distribute_static_init_4u, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_distribute_static_init_8, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_distribute_static_init_8u, GetterArgWriteAttrs, + AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), + ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, + AttributeSet(), AttributeSet())) +__OMP_RTL_ATTRS(__kmpc_distribute_static_fini, InaccessibleArgOnlyAttrs, + AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, GetterArgWriteAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(), @@ -854,9 +899,9 @@ __OMP_RTL_ATTRS(__kmpc_doacross_wait, BarrierAttrs, AttributeSet(), __OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) -__OMP_RTL_ATTRS(__kmpc_alloc_shared, DefaultAttrs, ReturnPtrAttrs, +__OMP_RTL_ATTRS(__kmpc_alloc_shared, DeviceAllocAttrs, ReturnPtrAttrs, ParamAttrs()) -__OMP_RTL_ATTRS(__kmpc_free_shared, AllocAttrs, AttributeSet(), +__OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(), ParamAttrs(NoCaptureAttrs)) __OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, ParamAttrs()) @@ -897,6 +942,9 @@ __OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(), __OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs, ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(), + ParamAttrs()) + #undef __OMP_RTL_ATTRS #undef OMP_RTL_ATTRS #undef AttributeSet @@ -920,6 +968,7 @@ __OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs, OMP_IDENT_FLAG(OMP_IDENT_FLAG_##Name, #Name, Value) __OMP_IDENT_FLAG(KMPC, 0x02) +__OMP_IDENT_FLAG(ATOMIC_REDUCE, 0x10) __OMP_IDENT_FLAG(BARRIER_EXPL, 0x20) __OMP_IDENT_FLAG(BARRIER_IMPL, 0x0040) __OMP_IDENT_FLAG(BARRIER_IMPL_MASK, 0x01C0) diff --git a/llvm/include/llvm/IR/AbstractCallSite.h b/llvm/include/llvm/IR/AbstractCallSite.h index e8cf05001542..31df4c75b6e7 100644 --- a/llvm/include/llvm/IR/AbstractCallSite.h +++ b/llvm/include/llvm/IR/AbstractCallSite.h @@ -153,7 +153,7 @@ public: /// Return the number of parameters of the callee. unsigned getNumArgOperands() const { if (isDirectCall()) - return CB->getNumArgOperands(); + return CB->arg_size(); // Subtract 1 for the callee encoding. return CI.ParameterEncoding.size() - 1; } diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h index dcf658f439b4..396ab6a9d01d 100644 --- a/llvm/include/llvm/IR/Argument.h +++ b/llvm/include/llvm/IR/Argument.h @@ -97,7 +97,7 @@ public: /// If this is a byval or inalloca argument, return its alignment. /// FIXME: Remove this function once transition to Align is over. /// Use getParamAlign() instead. - unsigned getParamAlignment() const; + uint64_t getParamAlignment() const; /// If this is a byval or inalloca argument, return its alignment. MaybeAlign getParamAlign() const; diff --git a/llvm/include/llvm/IR/Assumptions.h b/llvm/include/llvm/IR/Assumptions.h index f64616c25d87..08e6c8b6f1e0 100644 --- a/llvm/include/llvm/IR/Assumptions.h +++ b/llvm/include/llvm/IR/Assumptions.h @@ -15,12 +15,14 @@ #ifndef LLVM_IR_ASSUMPTIONS_H #define LLVM_IR_ASSUMPTIONS_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" namespace llvm { class Function; +class CallBase; /// The key we use for assumption attributes. constexpr StringRef AssumptionAttrKey = "llvm.assume"; @@ -43,7 +45,25 @@ private: }; /// Return true if \p F has the assumption \p AssumptionStr attached. -bool hasAssumption(Function &F, const KnownAssumptionString &AssumptionStr); +bool hasAssumption(const Function &F, + const KnownAssumptionString &AssumptionStr); + +/// Return true if \p CB or the callee has the assumption \p AssumptionStr +/// attached. +bool hasAssumption(const CallBase &CB, + const KnownAssumptionString &AssumptionStr); + +/// Return the set of all assumptions for the function \p F. +DenseSet<StringRef> getAssumptions(const Function &F); + +/// Return the set of all assumptions for the call \p CB. +DenseSet<StringRef> getAssumptions(const CallBase &CB); + +/// Appends the set of assumptions \p Assumptions to \F. +bool addAssumptions(Function &F, const DenseSet<StringRef> &Assumptions); + +/// Appends the set of assumptions \p Assumptions to \CB. +bool addAssumptions(CallBase &CB, const DenseSet<StringRef> &Assumptions); } // namespace llvm diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h index d7bd3edb3d4c..282be640d8be 100644 --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -37,7 +37,6 @@ class AttrBuilder; class AttributeImpl; class AttributeListImpl; class AttributeSetNode; -template<typename T> struct DenseMapInfo; class FoldingSetNodeID; class Function; class LLVMContext; @@ -78,6 +77,7 @@ public: TombstoneKey, ///< Use as Tombstone key for DenseMap of AttrKind }; + static const unsigned NumIntAttrKinds = LastIntAttr - FirstIntAttr + 1; static const unsigned NumTypeAttrKinds = LastTypeAttr - FirstTypeAttr + 1; static bool isEnumAttrKind(AttrKind Kind) { @@ -265,7 +265,7 @@ inline Attribute unwrap(LLVMAttributeRef Attr) { /// and removing string or integer attributes involves a FoldingSet lookup. class AttributeSet { friend AttributeListImpl; - template <typename Ty> friend struct DenseMapInfo; + template <typename Ty, typename Enable> friend struct DenseMapInfo; // TODO: Extract AvailableAttrs from AttributeSetNode and store them here. // This will allow an efficient implementation of addAttribute and @@ -366,7 +366,7 @@ public: //===----------------------------------------------------------------------===// /// \class /// Provide DenseMapInfo for AttributeSet. -template <> struct DenseMapInfo<AttributeSet> { +template <> struct DenseMapInfo<AttributeSet, void> { static AttributeSet getEmptyKey() { auto Val = static_cast<uintptr_t>(-1); Val <<= PointerLikeTypeTraits<void *>::NumLowBitsAvailable; @@ -408,7 +408,7 @@ private: friend class AttributeListImpl; friend class AttributeSet; friend class AttributeSetNode; - template <typename Ty> friend struct DenseMapInfo; + template <typename Ty, typename Enable> friend struct DenseMapInfo; /// The attributes that we are managing. This can be null to represent /// the empty attributes list. @@ -432,8 +432,8 @@ private: static AttributeList getImpl(LLVMContext &C, ArrayRef<AttributeSet> AttrSets); - AttributeList setAttributes(LLVMContext &C, unsigned Index, - AttributeSet Attrs) const; + AttributeList setAttributesAtIndex(LLVMContext &C, unsigned Index, + AttributeSet Attrs) const; public: AttributeList() = default; @@ -454,32 +454,84 @@ public: static AttributeList get(LLVMContext &C, unsigned Index, const AttrBuilder &B); + // TODO: remove non-AtIndex versions of these methods. /// Add an attribute to the attribute set at the given index. /// Returns a new list because attribute lists are immutable. - LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index, - Attribute::AttrKind Kind) const; + LLVM_NODISCARD AttributeList addAttributeAtIndex( + LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const; /// Add an attribute to the attribute set at the given index. /// Returns a new list because attribute lists are immutable. LLVM_NODISCARD AttributeList - addAttribute(LLVMContext &C, unsigned Index, StringRef Kind, - StringRef Value = StringRef()) const; + addAttributeAtIndex(LLVMContext &C, unsigned Index, StringRef Kind, + StringRef Value = StringRef()) const; /// Add an attribute to the attribute set at the given index. /// Returns a new list because attribute lists are immutable. - LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index, - Attribute A) const; + LLVM_NODISCARD AttributeList addAttributeAtIndex(LLVMContext &C, + unsigned Index, + Attribute A) const; /// Add attributes to the attribute set at the given index. /// Returns a new list because attribute lists are immutable. - LLVM_NODISCARD AttributeList addAttributes(LLVMContext &C, unsigned Index, - const AttrBuilder &B) const; + LLVM_NODISCARD AttributeList addAttributesAtIndex(LLVMContext &C, + unsigned Index, + const AttrBuilder &B) const; + + /// Add a function attribute to the list. Returns a new list because + /// attribute lists are immutable. + LLVM_NODISCARD AttributeList addFnAttribute(LLVMContext &C, + Attribute::AttrKind Kind) const { + return addAttributeAtIndex(C, FunctionIndex, Kind); + } + + /// Add a function attribute to the list. Returns a new list because + /// attribute lists are immutable. + LLVM_NODISCARD AttributeList addFnAttribute(LLVMContext &C, + Attribute Attr) const { + return addAttributeAtIndex(C, FunctionIndex, Attr); + } + + /// Add a function attribute to the list. Returns a new list because + /// attribute lists are immutable. + LLVM_NODISCARD AttributeList addFnAttribute( + LLVMContext &C, StringRef Kind, StringRef Value = StringRef()) const { + return addAttributeAtIndex(C, FunctionIndex, Kind, Value); + } + + /// Add function attribute to the list. Returns a new list because + /// attribute lists are immutable. + LLVM_NODISCARD AttributeList addFnAttributes(LLVMContext &C, + const AttrBuilder &B) const { + return addAttributesAtIndex(C, FunctionIndex, B); + } + + /// Add a return value attribute to the list. Returns a new list because + /// attribute lists are immutable. + LLVM_NODISCARD AttributeList addRetAttribute(LLVMContext &C, + Attribute::AttrKind Kind) const { + return addAttributeAtIndex(C, ReturnIndex, Kind); + } + + /// Add a return value attribute to the list. Returns a new list because + /// attribute lists are immutable. + LLVM_NODISCARD AttributeList addRetAttribute(LLVMContext &C, + Attribute Attr) const { + return addAttributeAtIndex(C, ReturnIndex, Attr); + } + + /// Add a return value attribute to the list. Returns a new list because + /// attribute lists are immutable. + LLVM_NODISCARD AttributeList addRetAttributes(LLVMContext &C, + const AttrBuilder &B) const { + return addAttributesAtIndex(C, ReturnIndex, B); + } /// Add an argument attribute to the list. Returns a new list because /// attribute lists are immutable. LLVM_NODISCARD AttributeList addParamAttribute( LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const { - return addAttribute(C, ArgNo + FirstArgIndex, Kind); + return addAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind); } /// Add an argument attribute to the list. Returns a new list because @@ -487,7 +539,7 @@ public: LLVM_NODISCARD AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, StringRef Kind, StringRef Value = StringRef()) const { - return addAttribute(C, ArgNo + FirstArgIndex, Kind, Value); + return addAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind, Value); } /// Add an attribute to the attribute list at the given arg indices. Returns a @@ -501,34 +553,87 @@ public: LLVM_NODISCARD AttributeList addParamAttributes(LLVMContext &C, unsigned ArgNo, const AttrBuilder &B) const { - return addAttributes(C, ArgNo + FirstArgIndex, B); + return addAttributesAtIndex(C, ArgNo + FirstArgIndex, B); } /// Remove the specified attribute at the specified index from this /// attribute list. Returns a new list because attribute lists are immutable. - LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index, - Attribute::AttrKind Kind) const; + LLVM_NODISCARD AttributeList removeAttributeAtIndex( + LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const; /// Remove the specified attribute at the specified index from this /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList removeAttributeAtIndex(LLVMContext &C, + unsigned Index, + StringRef Kind) const; LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index, - StringRef Kind) const; + StringRef Kind) const { + return removeAttributeAtIndex(C, Index, Kind); + } /// Remove the specified attributes at the specified index from this /// attribute list. Returns a new list because attribute lists are immutable. - LLVM_NODISCARD AttributeList removeAttributes( + LLVM_NODISCARD AttributeList removeAttributesAtIndex( LLVMContext &C, unsigned Index, const AttrBuilder &AttrsToRemove) const; /// Remove all attributes at the specified index from this /// attribute list. Returns a new list because attribute lists are immutable. - LLVM_NODISCARD AttributeList removeAttributes(LLVMContext &C, - unsigned Index) const; + LLVM_NODISCARD AttributeList removeAttributesAtIndex(LLVMContext &C, + unsigned Index) const; + + /// Remove the specified attribute at the function index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList + removeFnAttribute(LLVMContext &C, Attribute::AttrKind Kind) const { + return removeAttributeAtIndex(C, FunctionIndex, Kind); + } + + /// Remove the specified attribute at the function index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList removeFnAttribute(LLVMContext &C, + StringRef Kind) const { + return removeAttributeAtIndex(C, FunctionIndex, Kind); + } + + /// Remove the specified attribute at the function index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList + removeFnAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const { + return removeAttributesAtIndex(C, FunctionIndex, AttrsToRemove); + } + + /// Remove the attributes at the function index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList removeFnAttributes(LLVMContext &C) const { + return removeAttributesAtIndex(C, FunctionIndex); + } + + /// Remove the specified attribute at the return value index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList + removeRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const { + return removeAttributeAtIndex(C, ReturnIndex, Kind); + } + + /// Remove the specified attribute at the return value index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList removeRetAttribute(LLVMContext &C, + StringRef Kind) const { + return removeAttributeAtIndex(C, ReturnIndex, Kind); + } + + /// Remove the specified attribute at the return value index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList + removeRetAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const { + return removeAttributesAtIndex(C, ReturnIndex, AttrsToRemove); + } /// Remove the specified attribute at the specified arg index from this /// attribute list. Returns a new list because attribute lists are immutable. LLVM_NODISCARD AttributeList removeParamAttribute( LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const { - return removeAttribute(C, ArgNo + FirstArgIndex, Kind); + return removeAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind); } /// Remove the specified attribute at the specified arg index from this @@ -536,80 +641,55 @@ public: LLVM_NODISCARD AttributeList removeParamAttribute(LLVMContext &C, unsigned ArgNo, StringRef Kind) const { - return removeAttribute(C, ArgNo + FirstArgIndex, Kind); + return removeAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind); } /// Remove the specified attribute at the specified arg index from this /// attribute list. Returns a new list because attribute lists are immutable. LLVM_NODISCARD AttributeList removeParamAttributes( LLVMContext &C, unsigned ArgNo, const AttrBuilder &AttrsToRemove) const { - return removeAttributes(C, ArgNo + FirstArgIndex, AttrsToRemove); + return removeAttributesAtIndex(C, ArgNo + FirstArgIndex, AttrsToRemove); } /// Remove all attributes at the specified arg index from this /// attribute list. Returns a new list because attribute lists are immutable. LLVM_NODISCARD AttributeList removeParamAttributes(LLVMContext &C, unsigned ArgNo) const { - return removeAttributes(C, ArgNo + FirstArgIndex); + return removeAttributesAtIndex(C, ArgNo + FirstArgIndex); } /// Replace the type contained by attribute \p AttrKind at index \p ArgNo wih /// \p ReplacementTy, preserving all other attributes. - LLVM_NODISCARD AttributeList replaceAttributeType(LLVMContext &C, - unsigned ArgNo, - Attribute::AttrKind Kind, - Type *ReplacementTy) const { - Attribute Attr = getAttribute(ArgNo, Kind); - auto Attrs = removeAttribute(C, ArgNo, Kind); - return Attrs.addAttribute(C, ArgNo, Attr.getWithNewType(C, ReplacementTy)); + LLVM_NODISCARD AttributeList replaceAttributeTypeAtIndex( + LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind, + Type *ReplacementTy) const { + Attribute Attr = getAttributeAtIndex(ArgNo, Kind); + auto Attrs = removeAttributeAtIndex(C, ArgNo, Kind); + return Attrs.addAttributeAtIndex(C, ArgNo, + Attr.getWithNewType(C, ReplacementTy)); } /// \brief Add the dereferenceable attribute to the attribute set at the given /// index. Returns a new list because attribute lists are immutable. - LLVM_NODISCARD AttributeList addDereferenceableAttr(LLVMContext &C, - unsigned Index, - uint64_t Bytes) const; + LLVM_NODISCARD AttributeList addDereferenceableRetAttr(LLVMContext &C, + uint64_t Bytes) const; /// \brief Add the dereferenceable attribute to the attribute set at the given /// arg index. Returns a new list because attribute lists are immutable. LLVM_NODISCARD AttributeList addDereferenceableParamAttr( - LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const { - return addDereferenceableAttr(C, ArgNo + FirstArgIndex, Bytes); - } - - /// Add the dereferenceable_or_null attribute to the attribute set at - /// the given index. Returns a new list because attribute lists are immutable. - LLVM_NODISCARD AttributeList addDereferenceableOrNullAttr( - LLVMContext &C, unsigned Index, uint64_t Bytes) const; + LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const; /// Add the dereferenceable_or_null attribute to the attribute set at /// the given arg index. Returns a new list because attribute lists are /// immutable. LLVM_NODISCARD AttributeList addDereferenceableOrNullParamAttr( - LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const { - return addDereferenceableOrNullAttr(C, ArgNo + FirstArgIndex, Bytes); - } - - /// Add the allocsize attribute to the attribute set at the given index. - /// Returns a new list because attribute lists are immutable. - LLVM_NODISCARD AttributeList - addAllocSizeAttr(LLVMContext &C, unsigned Index, unsigned ElemSizeArg, - const Optional<unsigned> &NumElemsArg); + LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const; /// Add the allocsize attribute to the attribute set at the given arg index. /// Returns a new list because attribute lists are immutable. LLVM_NODISCARD AttributeList addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo, unsigned ElemSizeArg, - const Optional<unsigned> &NumElemsArg) { - return addAllocSizeAttr(C, ArgNo + FirstArgIndex, ElemSizeArg, NumElemsArg); - } - - /// Add the vscale_range attribute to the attribute set at the given index. - /// Returns a new list because attribute lists are immutable. - LLVM_NODISCARD AttributeList addVScaleRangeAttr(LLVMContext &C, - unsigned Index, - unsigned MinValue, - unsigned MaxValue); + const Optional<unsigned> &NumElemsArg); //===--------------------------------------------------------------------===// // AttributeList Accessors @@ -620,48 +700,59 @@ public: /// The attributes for the argument or parameter at the given index are /// returned. - AttributeSet getParamAttributes(unsigned ArgNo) const; + AttributeSet getParamAttrs(unsigned ArgNo) const; /// The attributes for the ret value are returned. - AttributeSet getRetAttributes() const; + AttributeSet getRetAttrs() const; /// The function attributes are returned. - AttributeSet getFnAttributes() const; + AttributeSet getFnAttrs() const; /// Return true if the attribute exists at the given index. - bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const; + bool hasAttributeAtIndex(unsigned Index, Attribute::AttrKind Kind) const; /// Return true if the attribute exists at the given index. - bool hasAttribute(unsigned Index, StringRef Kind) const; + bool hasAttributeAtIndex(unsigned Index, StringRef Kind) const; /// Return true if attribute exists at the given index. - bool hasAttributes(unsigned Index) const; + bool hasAttributesAtIndex(unsigned Index) const; /// Return true if the attribute exists for the given argument bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { - return hasAttribute(ArgNo + FirstArgIndex, Kind); + return hasAttributeAtIndex(ArgNo + FirstArgIndex, Kind); } /// Return true if the attribute exists for the given argument bool hasParamAttr(unsigned ArgNo, StringRef Kind) const { - return hasAttribute(ArgNo + FirstArgIndex, Kind); + return hasAttributeAtIndex(ArgNo + FirstArgIndex, Kind); } /// Return true if attributes exists for the given argument bool hasParamAttrs(unsigned ArgNo) const { - return hasAttributes(ArgNo + FirstArgIndex); + return hasAttributesAtIndex(ArgNo + FirstArgIndex); + } + + /// Return true if the attribute exists for the return value. + bool hasRetAttr(Attribute::AttrKind Kind) const { + return hasAttributeAtIndex(ReturnIndex, Kind); } - /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but - /// may be faster. - bool hasFnAttribute(Attribute::AttrKind Kind) const; + /// Return true if the attribute exists for the return value. + bool hasRetAttr(StringRef Kind) const { + return hasAttributeAtIndex(ReturnIndex, Kind); + } + + /// Return true if attributes exist for the return value. + bool hasRetAttrs() const { return hasAttributesAtIndex(ReturnIndex); } + + /// Return true if the attribute exists for the function. + bool hasFnAttr(Attribute::AttrKind Kind) const; - /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but - /// may be faster. - bool hasFnAttribute(StringRef Kind) const; + /// Return true if the attribute exists for the function. + bool hasFnAttr(StringRef Kind) const; - /// Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind). - bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const; + /// Return true the attributes exist for the function. + bool hasFnAttrs() const { return hasAttributesAtIndex(FunctionIndex); } /// Return true if the specified attribute is set for at least one /// parameter or for the return value. If Index is not nullptr, the index @@ -670,19 +761,29 @@ public: unsigned *Index = nullptr) const; /// Return the attribute object that exists at the given index. - Attribute getAttribute(unsigned Index, Attribute::AttrKind Kind) const; + Attribute getAttributeAtIndex(unsigned Index, Attribute::AttrKind Kind) const; /// Return the attribute object that exists at the given index. - Attribute getAttribute(unsigned Index, StringRef Kind) const; + Attribute getAttributeAtIndex(unsigned Index, StringRef Kind) const; /// Return the attribute object that exists at the arg index. Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { - return getAttribute(ArgNo + FirstArgIndex, Kind); + return getAttributeAtIndex(ArgNo + FirstArgIndex, Kind); } /// Return the attribute object that exists at the given index. Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const { - return getAttribute(ArgNo + FirstArgIndex, Kind); + return getAttributeAtIndex(ArgNo + FirstArgIndex, Kind); + } + + /// Return the attribute object that exists for the function. + Attribute getFnAttr(Attribute::AttrKind Kind) const { + return getAttributeAtIndex(FunctionIndex, Kind); + } + + /// Return the attribute object that exists for the function. + Attribute getFnAttr(StringRef Kind) const { + return getAttributeAtIndex(FunctionIndex, Kind); } /// Return the alignment of the return value. @@ -712,34 +813,26 @@ public: /// Return the elementtype type for the specified function parameter. Type *getParamElementType(unsigned ArgNo) const; - /// Get the stack alignment. - MaybeAlign getStackAlignment(unsigned Index) const; + /// Get the stack alignment of the function. + MaybeAlign getFnStackAlignment() const; - /// Get the number of dereferenceable bytes (or zero if unknown). - uint64_t getDereferenceableBytes(unsigned Index) const; + /// Get the stack alignment of the return value. + MaybeAlign getRetStackAlignment() const; - /// Get the number of dereferenceable bytes (or zero if unknown) of an - /// arg. - uint64_t getParamDereferenceableBytes(unsigned ArgNo) const { - return getDereferenceableBytes(ArgNo + FirstArgIndex); - } + /// Get the number of dereferenceable bytes (or zero if unknown) of the return + /// value. + uint64_t getRetDereferenceableBytes() const; - /// Get the number of dereferenceable_or_null bytes (or zero if - /// unknown). - uint64_t getDereferenceableOrNullBytes(unsigned Index) const; + /// Get the number of dereferenceable bytes (or zero if unknown) of an arg. + uint64_t getParamDereferenceableBytes(unsigned Index) const; - /// Get the number of dereferenceable_or_null bytes (or zero if - /// unknown) of an arg. - uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const { - return getDereferenceableOrNullBytes(ArgNo + FirstArgIndex); - } + /// Get the number of dereferenceable_or_null bytes (or zero if unknown) of + /// the return value. + uint64_t getRetDereferenceableOrNullBytes() const; - /// Get the allocsize argument numbers (or pair(0, 0) if unknown). - std::pair<unsigned, Optional<unsigned>> - getAllocSizeArgs(unsigned Index) const; - - /// Get the vscale_range argument numbers (or pair(0, 0) if unknown). - std::pair<unsigned, unsigned> getVScaleRangeArgs(unsigned Index) const; + /// Get the number of dereferenceable_or_null bytes (or zero if unknown) of an + /// arg. + uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const; /// Return the attributes at the index as a string. std::string getAsString(unsigned Index, bool InAttrGrp = false) const; @@ -758,9 +851,32 @@ public: unsigned getNumAttrSets() const; - /// Use these to iterate over the valid attribute indices. - unsigned index_begin() const { return AttributeList::FunctionIndex; } - unsigned index_end() const { return getNumAttrSets() - 1; } + // Implementation of indexes(). Produces iterators that wrap an index. Mostly + // to hide the awkwardness of unsigned wrapping when iterating over valid + // indexes. + struct index_iterator { + unsigned NumAttrSets; + index_iterator(int NumAttrSets) : NumAttrSets(NumAttrSets) {} + struct int_wrapper { + int_wrapper(unsigned i) : i(i) {} + unsigned i; + unsigned operator*() { return i; } + bool operator!=(const int_wrapper &Other) { return i != Other.i; } + int_wrapper &operator++() { + // This is expected to undergo unsigned wrapping since FunctionIndex is + // ~0 and that's where we start. + ++i; + return *this; + } + }; + + int_wrapper begin() { return int_wrapper(AttributeList::FunctionIndex); } + + int_wrapper end() { return int_wrapper(NumAttrSets - 1); } + }; + + /// Use this to iterate over the valid attribute indexes. + index_iterator indexes() const { return index_iterator(getNumAttrSets()); } /// operator==/!= - Provide equality predicates. bool operator==(const AttributeList &RHS) const { return pImpl == RHS.pImpl; } @@ -782,7 +898,7 @@ public: //===----------------------------------------------------------------------===// /// \class /// Provide DenseMapInfo for AttributeList. -template <> struct DenseMapInfo<AttributeList> { +template <> struct DenseMapInfo<AttributeList, void> { static AttributeList getEmptyKey() { auto Val = static_cast<uintptr_t>(-1); Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable; @@ -814,14 +930,10 @@ template <> struct DenseMapInfo<AttributeList> { class AttrBuilder { std::bitset<Attribute::EndAttrKinds> Attrs; std::map<SmallString<32>, SmallString<32>, std::less<>> TargetDepAttrs; - MaybeAlign Alignment; - MaybeAlign StackAlignment; - uint64_t DerefBytes = 0; - uint64_t DerefOrNullBytes = 0; - uint64_t AllocSizeArgs = 0; - uint64_t VScaleRangeArgs = 0; + std::array<uint64_t, Attribute::NumIntAttrKinds> IntAttrs = {}; std::array<Type *, Attribute::NumTypeAttrKinds> TypeAttrs = {}; + Optional<unsigned> kindToIntIndex(Attribute::AttrKind Kind) const; Optional<unsigned> kindToTypeIndex(Attribute::AttrKind Kind) const; public: @@ -891,19 +1003,31 @@ public: /// Return true if the builder has an alignment attribute. bool hasAlignmentAttr() const; + /// Return raw (possibly packed/encoded) value of integer attribute or 0 if + /// not set. + uint64_t getRawIntAttr(Attribute::AttrKind Kind) const; + /// Retrieve the alignment attribute, if it exists. - MaybeAlign getAlignment() const { return Alignment; } + MaybeAlign getAlignment() const { + return MaybeAlign(getRawIntAttr(Attribute::Alignment)); + } /// Retrieve the stack alignment attribute, if it exists. - MaybeAlign getStackAlignment() const { return StackAlignment; } + MaybeAlign getStackAlignment() const { + return MaybeAlign(getRawIntAttr(Attribute::StackAlignment)); + } /// Retrieve the number of dereferenceable bytes, if the /// dereferenceable attribute exists (zero is returned otherwise). - uint64_t getDereferenceableBytes() const { return DerefBytes; } + uint64_t getDereferenceableBytes() const { + return getRawIntAttr(Attribute::Dereferenceable); + } /// Retrieve the number of dereferenceable_or_null bytes, if the /// dereferenceable_or_null attribute exists (zero is returned otherwise). - uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; } + uint64_t getDereferenceableOrNullBytes() const { + return getRawIntAttr(Attribute::DereferenceableOrNull); + } /// Retrieve type for the given type attribute. Type *getTypeAttr(Attribute::AttrKind Kind) const; @@ -933,6 +1057,9 @@ public: /// it doesn't exist, pair(0, 0) is returned. std::pair<unsigned, unsigned> getVScaleRangeArgs() const; + /// Add integer attribute with raw value (packed/encoded if necessary). + AttrBuilder &addRawIntAttr(Attribute::AttrKind Kind, uint64_t Value); + /// This turns an alignment into the form used internally in Attribute. /// This call has no effect if Align is not set. AttrBuilder &addAlignmentAttr(MaybeAlign Align); diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index 99b474161df7..de25b51a6292 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -86,6 +86,9 @@ def Dereferenceable : IntAttr<"dereferenceable", [ParamAttr, RetAttr]>; def DereferenceableOrNull : IntAttr<"dereferenceable_or_null", [ParamAttr, RetAttr]>; +/// Do not instrument function with sanitizers. +def DisableSanitizerInstrumentation: EnumAttr<"disable_sanitizer_instrumentation", [FnAttr]>; + /// Provide pointer element type to intrinsic. def ElementType : TypeAttr<"elementtype", [ParamAttr]>; diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h index 0af4ec4ef138..184ddfc01c29 100644 --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -167,8 +167,8 @@ public: /// Returns a pointer to the first instruction in this block that is not a /// PHINode or a debug intrinsic, or any pseudo operation if \c SkipPseudoOp /// is true. - const Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) const; - Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) { + const Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = true) const; + Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = true) { return const_cast<Instruction *>( static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbg( SkipPseudoOp)); @@ -178,8 +178,8 @@ public: /// PHINode, a debug intrinsic, or a lifetime intrinsic, or any pseudo /// operation if \c SkipPseudoOp is true. const Instruction * - getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) const; - Instruction *getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) { + getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = true) const; + Instruction *getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = true) { return const_cast<Instruction *>( static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbgOrLifetime( SkipPseudoOp)); @@ -200,14 +200,14 @@ public: /// SkipPseudoOp is true. iterator_range<filter_iterator<BasicBlock::const_iterator, std::function<bool(const Instruction &)>>> - instructionsWithoutDebug(bool SkipPseudoOp = false) const; + instructionsWithoutDebug(bool SkipPseudoOp = true) const; /// Return an iterator range over the instructions in the block, skipping any /// debug instructions. Skip and any pseudo operations as well if \c /// SkipPseudoOp is true. iterator_range< filter_iterator<BasicBlock::iterator, std::function<bool(Instruction &)>>> - instructionsWithoutDebug(bool SkipPseudoOp = false); + instructionsWithoutDebug(bool SkipPseudoOp = true); /// Return the size of the basic block ignoring debug instructions filter_iterator<BasicBlock::const_iterator, diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h index 4e2022b36e30..c8999b71f3d1 100644 --- a/llvm/include/llvm/IR/Constant.h +++ b/llvm/include/llvm/IR/Constant.h @@ -198,6 +198,12 @@ public: /// hanging off of the globals. void removeDeadConstantUsers() const; + /// Return true if the constant has exactly one live use. + /// + /// This returns the same result as calling Value::hasOneUse after + /// Constant::removeDeadConstantUsers, but doesn't remove dead constants. + bool hasOneLiveUse() const; + const Constant *stripPointerCasts() const { return cast<Constant>(Value::stripPointerCasts()); } diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h index 44b8c395c89e..fea4d0da1d0d 100644 --- a/llvm/include/llvm/IR/ConstantRange.h +++ b/llvm/include/llvm/IR/ConstantRange.h @@ -128,6 +128,28 @@ public: /// NOTE: false does not mean that inverse predicate holds! bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const; + /// Return true iff CR1 ult CR2 is equivalent to CR1 slt CR2. + /// Does not depend on strictness/direction of the predicate. + static bool + areInsensitiveToSignednessOfICmpPredicate(const ConstantRange &CR1, + const ConstantRange &CR2); + + /// Return true iff CR1 ult CR2 is equivalent to CR1 sge CR2. + /// Does not depend on strictness/direction of the predicate. + static bool + areInsensitiveToSignednessOfInvertedICmpPredicate(const ConstantRange &CR1, + const ConstantRange &CR2); + + /// If the comparison between constant ranges this and Other + /// is insensitive to the signedness of the comparison predicate, + /// return a predicate equivalent to \p Pred, with flipped signedness + /// (i.e. unsigned instead of signed or vice versa), and maybe inverted, + /// otherwise returns CmpInst::Predicate::BAD_ICMP_PREDICATE. + static CmpInst::Predicate + getEquivalentPredWithFlippedSignedness(CmpInst::Predicate Pred, + const ConstantRange &CR1, + const ConstantRange &CR2); + /// Produce the largest range containing all X such that "X BinOp Y" is /// guaranteed not to wrap (overflow) for *all* Y in Other. However, there may /// be *some* Y in Other for which additional X not contained in the result @@ -167,6 +189,11 @@ public: /// successful. bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const; + /// Set up \p Pred, \p RHS and \p Offset such that (V + Offset) Pred RHS + /// is true iff V is in the range. Prefers using Offset == 0 if possible. + void + getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS, APInt &Offset) const; + /// Return the lower value for this range. const APInt &getLower() const { return Lower; } @@ -305,6 +332,14 @@ public: ConstantRange unionWith(const ConstantRange &CR, PreferredRangeType Type = Smallest) const; + /// Intersect the two ranges and return the result if it can be represented + /// exactly, otherwise return None. + Optional<ConstantRange> exactIntersectWith(const ConstantRange &CR) const; + + /// Union the two ranges and return the result if it can be represented + /// exactly, otherwise return None. + Optional<ConstantRange> exactUnionWith(const ConstantRange &CR) const; + /// Return a new range representing the possible values resulting /// from an application of the specified cast operator to this range. \p /// BitWidth is the target bitwidth of the cast. For casts which don't @@ -383,6 +418,11 @@ public: /// treating both this and \p Other as unsigned ranges. ConstantRange multiply(const ConstantRange &Other) const; + /// Return range of possible values for a signed multiplication of this and + /// \p Other. However, if overflow is possible always return a full range + /// rather than trying to determine a more precise result. + ConstantRange smul_fast(const ConstantRange &Other) const; + /// Return a new range representing the possible values resulting /// from a signed maximum of a value in this range and a value in \p Other. ConstantRange smax(const ConstantRange &Other) const; diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index 1f716a45b70f..71414d95d9a3 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -191,19 +191,19 @@ public: /// This is just a convenience method to make client code smaller for a /// common code. It also correctly performs the comparison without the /// potential for an assertion from getZExtValue(). - bool isZero() const { return Val.isNullValue(); } + bool isZero() const { return Val.isZero(); } /// This is just a convenience method to make client code smaller for a /// common case. It also correctly performs the comparison without the /// potential for an assertion from getZExtValue(). /// Determine if the value is one. - bool isOne() const { return Val.isOneValue(); } + bool isOne() const { return Val.isOne(); } /// This function will return true iff every bit in this constant is set /// to true. /// @returns true iff this constant's bits are all set to true. /// Determine if the value is all ones. - bool isMinusOne() const { return Val.isAllOnesValue(); } + bool isMinusOne() const { return Val.isAllOnes(); } /// This function will return true iff this constant represents the largest /// value that may be represented by the constant's type. @@ -1287,10 +1287,6 @@ public: /// Return a string representation for an opcode. const char *getOpcodeName() const; - /// Return a constant expression identical to this one, but with the specified - /// operand set to the specified value. - Constant *getWithOperandReplaced(unsigned OpNo, Constant *Op) const; - /// This returns the current constant expression with the operands replaced /// with the specified values. The specified array must have the same number /// of operands as our current one. @@ -1312,13 +1308,14 @@ public: Type *SrcTy = nullptr) const; /// Returns an Instruction which implements the same operation as this - /// ConstantExpr. The instruction is not linked to any basic block. + /// ConstantExpr. If \p InsertBefore is not null, the new instruction is + /// inserted before it, otherwise it is not inserted into any basic block. /// /// A better approach to this could be to have a constructor for Instruction /// which would take a ConstantExpr parameter, but that would have spread /// implementation details of ConstantExpr outside of Constants.cpp, which /// would make it harder to remove ConstantExprs altogether. - Instruction *getAsInstruction() const; + Instruction *getAsInstruction(Instruction *InsertBefore = nullptr) const; /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Value *V) { diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index 23ac47ca4d81..61c6dd885980 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -181,7 +181,7 @@ namespace llvm { DIFile *File); /// Create a single enumerator value. - DIEnumerator *createEnumerator(StringRef Name, APSInt Value); + DIEnumerator *createEnumerator(StringRef Name, const APSInt &Value); DIEnumerator *createEnumerator(StringRef Name, uint64_t Val, bool IsUnsigned = false); @@ -219,11 +219,12 @@ namespace llvm { /// \param AlignInBits Alignment. (optional) /// \param DWARFAddressSpace DWARF address space. (optional) /// \param Name Pointer type name. (optional) - DIDerivedType *createPointerType(DIType *PointeeTy, uint64_t SizeInBits, - uint32_t AlignInBits = 0, - Optional<unsigned> DWARFAddressSpace = - None, - StringRef Name = ""); + /// \param Annotations Member annotations. + DIDerivedType * + createPointerType(DIType *PointeeTy, uint64_t SizeInBits, + uint32_t AlignInBits = 0, + Optional<unsigned> DWARFAddressSpace = None, + StringRef Name = "", DINodeArray Annotations = nullptr); /// Create debugging information entry for a pointer to member. /// \param PointeeTy Type pointed to by this pointer. @@ -250,9 +251,11 @@ namespace llvm { /// \param LineNo Line number. /// \param Context The surrounding context for the typedef. /// \param AlignInBits Alignment. (optional) + /// \param Annotations Annotations. (optional) DIDerivedType *createTypedef(DIType *Ty, StringRef Name, DIFile *File, unsigned LineNo, DIScope *Context, - uint32_t AlignInBits = 0); + uint32_t AlignInBits = 0, + DINodeArray Annotations = nullptr); /// Create debugging information entry for a 'friend'. DIDerivedType *createFriend(DIType *Ty, DIType *FriendTy); @@ -279,12 +282,13 @@ namespace llvm { /// \param OffsetInBits Member offset. /// \param Flags Flags to encode member attribute, e.g. private /// \param Ty Parent type. + /// \param Annotations Member annotations. DIDerivedType *createMemberType(DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo, - uint64_t SizeInBits, - uint32_t AlignInBits, + uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits, - DINode::DIFlags Flags, DIType *Ty); + DINode::DIFlags Flags, DIType *Ty, + DINodeArray Annotations = nullptr); /// Create debugging information entry for a variant. A variant /// normally should be a member of a variant part. @@ -317,10 +321,14 @@ namespace llvm { /// \param StorageOffsetInBits Member storage offset. /// \param Flags Flags to encode member attribute. /// \param Ty Parent type. - DIDerivedType *createBitFieldMemberType( - DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo, - uint64_t SizeInBits, uint64_t OffsetInBits, - uint64_t StorageOffsetInBits, DINode::DIFlags Flags, DIType *Ty); + /// \param Annotations Member annotations. + DIDerivedType *createBitFieldMemberType(DIScope *Scope, StringRef Name, + DIFile *File, unsigned LineNo, + uint64_t SizeInBits, + uint64_t OffsetInBits, + uint64_t StorageOffsetInBits, + DINode::DIFlags Flags, DIType *Ty, + DINodeArray Annotations = nullptr); /// Create debugging information entry for a /// C++ static data member. @@ -586,7 +594,7 @@ namespace llvm { unsigned Tag, StringRef Name, DIScope *Scope, DIFile *F, unsigned Line, unsigned RuntimeLang = 0, uint64_t SizeInBits = 0, uint32_t AlignInBits = 0, DINode::DIFlags Flags = DINode::FlagFwdDecl, - StringRef UniqueIdentifier = ""); + StringRef UniqueIdentifier = "", DINodeArray Annotations = nullptr); /// Retain DIScope* in a module even if it is not referenced /// through debug info anchors. @@ -636,7 +644,8 @@ namespace llvm { DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DIType *Ty, bool IsLocalToUnit, bool isDefined = true, DIExpression *Expr = nullptr, MDNode *Decl = nullptr, - MDTuple *TemplateParams = nullptr, uint32_t AlignInBits = 0); + MDTuple *TemplateParams = nullptr, uint32_t AlignInBits = 0, + DINodeArray Annotations = nullptr); /// Identical to createGlobalVariable /// except that the resulting DbgNode is temporary and meant to be RAUWed. @@ -682,7 +691,8 @@ namespace llvm { createParameterVariable(DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File, unsigned LineNo, DIType *Ty, bool AlwaysPreserve = false, - DINode::DIFlags Flags = DINode::FlagZero); + DINode::DIFlags Flags = DINode::FlagZero, + DINodeArray Annotations = nullptr); /// Create a new descriptor for the specified /// variable which has a complex address expression for its address. @@ -711,6 +721,7 @@ namespace llvm { /// \param SPFlags Additional flags specific to subprograms. /// \param TParams Function template parameters. /// \param ThrownTypes Exception types this function may throw. + /// \param Annotations Attribute Annotations. DISubprogram * createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File, unsigned LineNo, DISubroutineType *Ty, @@ -718,7 +729,8 @@ namespace llvm { DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagZero, DITemplateParameterArray TParams = nullptr, DISubprogram *Decl = nullptr, - DITypeArray ThrownTypes = nullptr); + DITypeArray ThrownTypes = nullptr, + DINodeArray Annotations = nullptr); /// Identical to createFunction, /// except that the resulting DbgNode is meant to be RAUWed. @@ -818,29 +830,35 @@ namespace llvm { unsigned Line, unsigned Col); /// Create a descriptor for an imported module. - /// \param Context The scope this module is imported into - /// \param NS The namespace being imported here. - /// \param File File where the declaration is located. - /// \param Line Line number of the declaration. + /// \param Context The scope this module is imported into + /// \param NS The namespace being imported here. + /// \param File File where the declaration is located. + /// \param Line Line number of the declaration. + /// \param Elements Renamed elements. DIImportedEntity *createImportedModule(DIScope *Context, DINamespace *NS, - DIFile *File, unsigned Line); + DIFile *File, unsigned Line, + DINodeArray Elements = nullptr); /// Create a descriptor for an imported module. /// \param Context The scope this module is imported into. /// \param NS An aliased namespace. /// \param File File where the declaration is located. /// \param Line Line number of the declaration. + /// \param Elements Renamed elements. DIImportedEntity *createImportedModule(DIScope *Context, DIImportedEntity *NS, DIFile *File, - unsigned Line); + unsigned Line, + DINodeArray Elements = nullptr); /// Create a descriptor for an imported module. - /// \param Context The scope this module is imported into. - /// \param M The module being imported here - /// \param File File where the declaration is located. - /// \param Line Line number of the declaration. + /// \param Context The scope this module is imported into. + /// \param M The module being imported here + /// \param File File where the declaration is located. + /// \param Line Line number of the declaration. + /// \param Elements Renamed elements. DIImportedEntity *createImportedModule(DIScope *Context, DIModule *M, - DIFile *File, unsigned Line); + DIFile *File, unsigned Line, + DINodeArray Elements = nullptr); /// Create a descriptor for an imported function. /// \param Context The scope this module is imported into. @@ -848,9 +866,11 @@ namespace llvm { /// variable. /// \param File File where the declaration is located. /// \param Line Line number of the declaration. + /// \param Elements Renamed elements. DIImportedEntity *createImportedDeclaration(DIScope *Context, DINode *Decl, DIFile *File, unsigned Line, - StringRef Name = ""); + StringRef Name = "", + DINodeArray Elements = nullptr); /// Insert a new llvm.dbg.declare intrinsic call. /// \param Storage llvm::Value of the variable diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 300f73c12df0..46acd403bef1 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -19,6 +19,7 @@ #ifndef LLVM_IR_DATALAYOUT_H #define LLVM_IR_DATALAYOUT_H +#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -135,6 +136,7 @@ private: MM_MachO, MM_WinCOFF, MM_WinCOFFX86, + MM_GOFF, MM_Mips, MM_XCOFF }; @@ -316,6 +318,7 @@ public: switch (ManglingMode) { case MM_None: case MM_ELF: + case MM_GOFF: case MM_Mips: case MM_WinCOFF: case MM_XCOFF: @@ -334,6 +337,8 @@ public: case MM_ELF: case MM_WinCOFF: return ".L"; + case MM_GOFF: + return "@"; case MM_Mips: return "$"; case MM_MachO: @@ -372,8 +377,8 @@ public: /// the backends/clients are updated. unsigned getPointerSize(unsigned AS = 0) const; - /// Returns the maximum pointer size over all address spaces. - unsigned getMaxPointerSize() const; + /// Returns the maximum index size over all address spaces. + unsigned getMaxIndexSize() const; // Index size used for address calculation. unsigned getIndexSize(unsigned AS) const; @@ -405,9 +410,9 @@ public: return getPointerSize(AS) * 8; } - /// Returns the maximum pointer size over all address spaces. - unsigned getMaxPointerSizeInBits() const { - return getMaxPointerSize() * 8; + /// Returns the maximum index size over all address spaces. + unsigned getMaxIndexSizeInBits() const { + return getMaxIndexSize() * 8; } /// Size in bits of index used for address calculation in getelementptr. @@ -514,7 +519,7 @@ public: /// Returns the minimum ABI-required alignment for the specified type. /// FIXME: Deprecate this function once migration to Align is over. - unsigned getABITypeAlignment(Type *Ty) const; + uint64_t getABITypeAlignment(Type *Ty) const; /// Returns the minimum ABI-required alignment for the specified type. Align getABITypeAlign(Type *Ty) const; @@ -537,7 +542,7 @@ public: /// /// This is always at least as good as the ABI alignment. /// FIXME: Deprecate this function once migration to Align is over. - unsigned getPrefTypeAlignment(Type *Ty) const; + uint64_t getPrefTypeAlignment(Type *Ty) const; /// Returns the preferred stack/global alignment for the specified /// type. @@ -579,6 +584,10 @@ public: /// This is used to implement getelementptr. int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef<Value *> Indices) const; + /// Get GEP indices to access Offset inside ElemTy. ElemTy is updated to be + /// the result element type and Offset to be the residual offset. + SmallVector<APInt> getGEPIndicesForOffset(Type *&ElemTy, APInt &Offset) const; + /// Returns a StructLayout object, indicating the alignment of the /// struct, its size, and the offsets of its fields. /// diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h index eba422a9fde6..730c69d0c622 100644 --- a/llvm/include/llvm/IR/DebugInfo.h +++ b/llvm/include/llvm/IR/DebugInfo.h @@ -106,8 +106,6 @@ public: void reset(); private: - void InitializeTypeMap(const Module &M); - void processCompileUnit(DICompileUnit *CU); void processScope(DIScope *Scope); void processType(DIType *DT); diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 20a032f04909..c04f07c534af 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -256,11 +256,13 @@ class GenericDINode : public DINode { public: unsigned getHash() const { return SubclassData32; } - DEFINE_MDNODE_GET(GenericDINode, (unsigned Tag, StringRef Header, - ArrayRef<Metadata *> DwarfOps), + DEFINE_MDNODE_GET(GenericDINode, + (unsigned Tag, StringRef Header, + ArrayRef<Metadata *> DwarfOps), (Tag, Header, DwarfOps)) - DEFINE_MDNODE_GET(GenericDINode, (unsigned Tag, MDString *Header, - ArrayRef<Metadata *> DwarfOps), + DEFINE_MDNODE_GET(GenericDINode, + (unsigned Tag, MDString *Header, + ArrayRef<Metadata *> DwarfOps), (Tag, Header, DwarfOps)) /// Return a (temporary) clone of this. @@ -324,7 +326,7 @@ public: DEFINE_MDNODE_GET(DISubrange, (int64_t Count, int64_t LowerBound = 0), (Count, LowerBound)) - DEFINE_MDNODE_GET(DISubrange, (Metadata *CountNode, int64_t LowerBound = 0), + DEFINE_MDNODE_GET(DISubrange, (Metadata * CountNode, int64_t LowerBound = 0), (CountNode, LowerBound)) DEFINE_MDNODE_GET(DISubrange, @@ -334,9 +336,7 @@ public: TempDISubrange clone() const { return cloneImpl(); } - Metadata *getRawCountNode() const { - return getOperand(0).get(); - } + Metadata *getRawCountNode() const { return getOperand(0).get(); } Metadata *getRawLowerBound() const { return getOperand(1).get(); } @@ -548,14 +548,13 @@ public: }; /// A single checksum, represented by a \a Kind and a \a Value (a string). - template <typename T> - struct ChecksumInfo { + template <typename T> struct ChecksumInfo { /// The kind of checksum which \a Value encodes. ChecksumKind Kind; /// The string value of the checksum. T Value; - ChecksumInfo(ChecksumKind Kind, T Value) : Kind(Kind), Value(Value) { } + ChecksumInfo(ChecksumKind Kind, T Value) : Kind(Kind), Value(Value) {} ~ChecksumInfo() = default; bool operator==(const ChecksumInfo<T> &X) const { return Kind == X.Kind && Value == X.Value; @@ -578,15 +577,17 @@ private: static DIFile *getImpl(LLVMContext &Context, StringRef Filename, StringRef Directory, Optional<ChecksumInfo<StringRef>> CS, - Optional<StringRef> Source, - StorageType Storage, bool ShouldCreate = true) { + Optional<StringRef> Source, StorageType Storage, + bool ShouldCreate = true) { Optional<ChecksumInfo<MDString *>> MDChecksum; if (CS) MDChecksum.emplace(CS->Kind, getCanonicalMDString(Context, CS->Value)); - return getImpl(Context, getCanonicalMDString(Context, Filename), - getCanonicalMDString(Context, Directory), MDChecksum, - Source ? Optional<MDString *>(getCanonicalMDString(Context, *Source)) : None, - Storage, ShouldCreate); + return getImpl( + Context, getCanonicalMDString(Context, Filename), + getCanonicalMDString(Context, Directory), MDChecksum, + Source ? Optional<MDString *>(getCanonicalMDString(Context, *Source)) + : None, + Storage, ShouldCreate); } static DIFile *getImpl(LLVMContext &Context, MDString *Filename, MDString *Directory, @@ -600,13 +601,15 @@ private: } public: - DEFINE_MDNODE_GET(DIFile, (StringRef Filename, StringRef Directory, - Optional<ChecksumInfo<StringRef>> CS = None, - Optional<StringRef> Source = None), + DEFINE_MDNODE_GET(DIFile, + (StringRef Filename, StringRef Directory, + Optional<ChecksumInfo<StringRef>> CS = None, + Optional<StringRef> Source = None), (Filename, Directory, CS, Source)) - DEFINE_MDNODE_GET(DIFile, (MDString * Filename, MDString *Directory, - Optional<ChecksumInfo<MDString *>> CS = None, - Optional<MDString *> Source = None), + DEFINE_MDNODE_GET(DIFile, + (MDString * Filename, MDString *Directory, + Optional<ChecksumInfo<MDString *>> CS = None, + Optional<MDString *> Source = None), (Filename, Directory, CS, Source)) TempDIFile clone() const { return cloneImpl(); } @@ -707,7 +710,6 @@ public: DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); } StringRef getName() const { return getStringOperand(2); } - Metadata *getRawScope() const { return getOperand(1); } MDString *getRawName() const { return getOperandAs<MDString>(2); } @@ -936,47 +938,48 @@ class DIDerivedType : public DIType { unsigned Line, DIScope *Scope, DIType *BaseType, uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits, Optional<unsigned> DWARFAddressSpace, DIFlags Flags, - Metadata *ExtraData, StorageType Storage, bool ShouldCreate = true) { + Metadata *ExtraData, DINodeArray Annotations, StorageType Storage, + bool ShouldCreate = true) { return getImpl(Context, Tag, getCanonicalMDString(Context, Name), File, Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits, - DWARFAddressSpace, Flags, ExtraData, Storage, ShouldCreate); - } - static DIDerivedType *getImpl(LLVMContext &Context, unsigned Tag, - MDString *Name, Metadata *File, unsigned Line, - Metadata *Scope, Metadata *BaseType, - uint64_t SizeInBits, uint32_t AlignInBits, - uint64_t OffsetInBits, - Optional<unsigned> DWARFAddressSpace, - DIFlags Flags, Metadata *ExtraData, - StorageType Storage, bool ShouldCreate = true); + DWARFAddressSpace, Flags, ExtraData, Annotations.get(), + Storage, ShouldCreate); + } + static DIDerivedType * + getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File, + unsigned Line, Metadata *Scope, Metadata *BaseType, + uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits, + Optional<unsigned> DWARFAddressSpace, DIFlags Flags, + Metadata *ExtraData, Metadata *Annotations, StorageType Storage, + bool ShouldCreate = true); TempDIDerivedType cloneImpl() const { - return getTemporary(getContext(), getTag(), getName(), getFile(), getLine(), - getScope(), getBaseType(), getSizeInBits(), - getAlignInBits(), getOffsetInBits(), - getDWARFAddressSpace(), getFlags(), getExtraData()); + return getTemporary( + getContext(), getTag(), getName(), getFile(), getLine(), getScope(), + getBaseType(), getSizeInBits(), getAlignInBits(), getOffsetInBits(), + getDWARFAddressSpace(), getFlags(), getExtraData(), getAnnotations()); } public: - DEFINE_MDNODE_GET(DIDerivedType, - (unsigned Tag, MDString *Name, Metadata *File, - unsigned Line, Metadata *Scope, Metadata *BaseType, - uint64_t SizeInBits, uint32_t AlignInBits, - uint64_t OffsetInBits, - Optional<unsigned> DWARFAddressSpace, DIFlags Flags, - Metadata *ExtraData = nullptr), - (Tag, Name, File, Line, Scope, BaseType, SizeInBits, - AlignInBits, OffsetInBits, DWARFAddressSpace, Flags, - ExtraData)) + DEFINE_MDNODE_GET( + DIDerivedType, + (unsigned Tag, MDString *Name, Metadata *File, unsigned Line, + Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits, + uint32_t AlignInBits, uint64_t OffsetInBits, + Optional<unsigned> DWARFAddressSpace, DIFlags Flags, + Metadata *ExtraData = nullptr, Metadata *Annotations = nullptr), + (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits, + OffsetInBits, DWARFAddressSpace, Flags, ExtraData, Annotations)) DEFINE_MDNODE_GET(DIDerivedType, (unsigned Tag, StringRef Name, DIFile *File, unsigned Line, DIScope *Scope, DIType *BaseType, uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits, Optional<unsigned> DWARFAddressSpace, DIFlags Flags, - Metadata *ExtraData = nullptr), + Metadata *ExtraData = nullptr, + DINodeArray Annotations = nullptr), (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits, DWARFAddressSpace, Flags, - ExtraData)) + ExtraData, Annotations)) TempDIDerivedType clone() const { return cloneImpl(); } @@ -999,6 +1002,12 @@ public: Metadata *getExtraData() const { return getRawExtraData(); } Metadata *getRawExtraData() const { return getOperand(4); } + /// Get annotations associated with this derived type. + DINodeArray getAnnotations() const { + return cast_or_null<MDTuple>(getRawAnnotations()); + } + Metadata *getRawAnnotations() const { return getOperand(5); } + /// Get casted version of extra data. /// @{ DIType *getClassType() const { @@ -1065,8 +1074,8 @@ class DICompositeType : public DIType { /// Change fields in place. void mutate(unsigned Tag, unsigned Line, unsigned RuntimeLang, - uint64_t SizeInBits, uint32_t AlignInBits, - uint64_t OffsetInBits, DIFlags Flags) { + uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits, + DIFlags Flags) { assert(isDistinct() && "Only distinct nodes can mutate"); assert(getRawIdentifier() && "Only ODR-uniqued nodes should mutate"); this->RuntimeLang = RuntimeLang; @@ -1081,13 +1090,14 @@ class DICompositeType : public DIType { DITemplateParameterArray TemplateParams, StringRef Identifier, DIDerivedType *Discriminator, Metadata *DataLocation, Metadata *Associated, Metadata *Allocated, Metadata *Rank, - StorageType Storage, bool ShouldCreate = true) { + DINodeArray Annotations, StorageType Storage, + bool ShouldCreate = true) { return getImpl( Context, Tag, getCanonicalMDString(Context, Name), File, Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements.get(), RuntimeLang, VTableHolder, TemplateParams.get(), getCanonicalMDString(Context, Identifier), Discriminator, DataLocation, - Associated, Allocated, Rank, Storage, ShouldCreate); + Associated, Allocated, Rank, Annotations.get(), Storage, ShouldCreate); } static DICompositeType * getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File, @@ -1097,16 +1107,16 @@ class DICompositeType : public DIType { Metadata *VTableHolder, Metadata *TemplateParams, MDString *Identifier, Metadata *Discriminator, Metadata *DataLocation, Metadata *Associated, Metadata *Allocated, Metadata *Rank, - StorageType Storage, bool ShouldCreate = true); + Metadata *Annotations, StorageType Storage, bool ShouldCreate = true); TempDICompositeType cloneImpl() const { - return getTemporary(getContext(), getTag(), getName(), getFile(), getLine(), - getScope(), getBaseType(), getSizeInBits(), - getAlignInBits(), getOffsetInBits(), getFlags(), - getElements(), getRuntimeLang(), getVTableHolder(), - getTemplateParams(), getIdentifier(), - getDiscriminator(), getRawDataLocation(), - getRawAssociated(), getRawAllocated(), getRawRank()); + return getTemporary( + getContext(), getTag(), getName(), getFile(), getLine(), getScope(), + getBaseType(), getSizeInBits(), getAlignInBits(), getOffsetInBits(), + getFlags(), getElements(), getRuntimeLang(), getVTableHolder(), + getTemplateParams(), getIdentifier(), getDiscriminator(), + getRawDataLocation(), getRawAssociated(), getRawAllocated(), + getRawRank(), getAnnotations()); } public: @@ -1119,10 +1129,12 @@ public: DITemplateParameterArray TemplateParams = nullptr, StringRef Identifier = "", DIDerivedType *Discriminator = nullptr, Metadata *DataLocation = nullptr, Metadata *Associated = nullptr, - Metadata *Allocated = nullptr, Metadata *Rank = nullptr), + Metadata *Allocated = nullptr, Metadata *Rank = nullptr, + DINodeArray Annotations = nullptr), (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams, - Identifier, Discriminator, DataLocation, Associated, Allocated, Rank)) + Identifier, Discriminator, DataLocation, Associated, Allocated, Rank, + Annotations)) DEFINE_MDNODE_GET( DICompositeType, (unsigned Tag, MDString *Name, Metadata *File, unsigned Line, @@ -1132,10 +1144,11 @@ public: Metadata *TemplateParams = nullptr, MDString *Identifier = nullptr, Metadata *Discriminator = nullptr, Metadata *DataLocation = nullptr, Metadata *Associated = nullptr, Metadata *Allocated = nullptr, - Metadata *Rank = nullptr), + Metadata *Rank = nullptr, Metadata *Annotations = nullptr), (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams, - Identifier, Discriminator, DataLocation, Associated, Allocated, Rank)) + Identifier, Discriminator, DataLocation, Associated, Allocated, Rank, + Annotations)) TempDICompositeType clone() const { return cloneImpl(); } @@ -1154,7 +1167,7 @@ public: unsigned RuntimeLang, Metadata *VTableHolder, Metadata *TemplateParams, Metadata *Discriminator, Metadata *DataLocation, Metadata *Associated, Metadata *Allocated, - Metadata *Rank); + Metadata *Rank, Metadata *Annotations); static DICompositeType *getODRTypeIfExists(LLVMContext &Context, MDString &Identifier); @@ -1175,7 +1188,7 @@ public: unsigned RuntimeLang, Metadata *VTableHolder, Metadata *TemplateParams, Metadata *Discriminator, Metadata *DataLocation, Metadata *Associated, - Metadata *Allocated, Metadata *Rank); + Metadata *Allocated, Metadata *Rank, Metadata *Annotations); DIType *getBaseType() const { return cast_or_null<DIType>(getRawBaseType()); } DINodeArray getElements() const { @@ -1196,7 +1209,9 @@ public: Metadata *getRawTemplateParams() const { return getOperand(6); } MDString *getRawIdentifier() const { return getOperandAs<MDString>(7); } Metadata *getRawDiscriminator() const { return getOperand(8); } - DIDerivedType *getDiscriminator() const { return getOperandAs<DIDerivedType>(8); } + DIDerivedType *getDiscriminator() const { + return getOperandAs<DIDerivedType>(8); + } Metadata *getRawDataLocation() const { return getOperand(9); } DIVariable *getDataLocation() const { return dyn_cast_or_null<DIVariable>(getRawDataLocation()); @@ -1228,6 +1243,11 @@ public: return dyn_cast_or_null<DIExpression>(getRawRank()); } + Metadata *getRawAnnotations() const { return getOperand(13); } + DINodeArray getAnnotations() const { + return cast_or_null<MDTuple>(getRawAnnotations()); + } + /// Replace operands. /// /// If this \a isUniqued() and not \a isResolved(), on a uniquing collision @@ -1507,9 +1527,7 @@ public: void replaceEnumTypes(DICompositeTypeArray N) { replaceOperandWith(4, N.get()); } - void replaceRetainedTypes(DITypeArray N) { - replaceOperandWith(5, N.get()); - } + void replaceRetainedTypes(DITypeArray N) { replaceOperandWith(5, N.get()); } void replaceGlobalVariables(DIGlobalVariableExpressionArray N) { replaceOperandWith(6, N.get()); } @@ -1691,7 +1709,8 @@ public: /// base discriminator is set in the new DILocation, the other encoded values /// are elided. /// If the discriminator cannot be encoded, the function returns None. - inline Optional<const DILocation *> cloneWithBaseDiscriminator(unsigned BD) const; + inline Optional<const DILocation *> + cloneWithBaseDiscriminator(unsigned BD) const; /// Returns the duplication factor stored in the discriminator, or 1 if no /// duplication factor (or 0) is encoded. @@ -1707,7 +1726,8 @@ public: /// duplication factor encoded in the discriminator. The current duplication /// factor is as defined by getDuplicationFactor(). /// Returns None if encoding failed. - inline Optional<const DILocation *> cloneByMultiplyingDuplicationFactor(unsigned DF) const; + inline Optional<const DILocation *> + cloneByMultiplyingDuplicationFactor(unsigned DF) const; /// When two instructions are combined into a single instruction we also /// need to combine the original locations into a single location. @@ -1730,8 +1750,8 @@ public: /// This function applies getMergedLocation() repeatedly left-to-right. /// /// \p Locs: The locations to be merged. - static - const DILocation *getMergedLocations(ArrayRef<const DILocation *> Locs); + static const DILocation * + getMergedLocations(ArrayRef<const DILocation *> Locs); /// Return the masked discriminator value for an input discrimnator value D /// (i.e. zero out the (B+1)-th and above bits for D (B is 0-base). @@ -1755,13 +1775,18 @@ public: /// Raw encoding of the discriminator. APIs such as cloneWithDuplicationFactor /// have certain special case behavior (e.g. treating empty duplication factor /// as the value '1'). - /// This API, in conjunction with cloneWithDiscriminator, may be used to encode - /// the raw values provided. \p BD: base discriminator \p DF: duplication factor + /// This API, in conjunction with cloneWithDiscriminator, may be used to + /// encode the raw values provided. + /// + /// \p BD: base discriminator + /// \p DF: duplication factor /// \p CI: copy index + /// /// The return is None if the values cannot be encoded in 32 bits - for - /// example, values for BD or DF larger than 12 bits. Otherwise, the return - /// is the encoded value. - static Optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF, unsigned CI); + /// example, values for BD or DF larger than 12 bits. Otherwise, the return is + /// the encoded value. + static Optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF, + unsigned CI); /// Raw decoder for values in an encoded discriminator D. static void decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF, @@ -1781,11 +1806,10 @@ public: /// Returns the copy identifier for a given encoded discriminator \p D. static unsigned getCopyIdentifierFromDiscriminator(unsigned D) { - return getUnsignedFromPrefixEncoding(getNextComponentInDiscriminator( - getNextComponentInDiscriminator(D))); + return getUnsignedFromPrefixEncoding( + getNextComponentInDiscriminator(getNextComponentInDiscriminator(D))); } - Metadata *getRawScope() const { return getOperand(0); } Metadata *getRawInlinedAt() const { if (getNumOperands() == 2) @@ -1839,10 +1863,10 @@ public: unsigned Virtuality = SPFlagNonvirtual, bool IsMainSubprogram = false) { // We're assuming virtuality is the low-order field. - static_assert( - int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) && - int(SPFlagPureVirtual) == int(dwarf::DW_VIRTUALITY_pure_virtual), - "Virtuality constant mismatch"); + static_assert(int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) && + int(SPFlagPureVirtual) == + int(dwarf::DW_VIRTUALITY_pure_virtual), + "Virtuality constant mismatch"); return static_cast<DISPFlags>( (Virtuality & SPFlagVirtuality) | (IsLocalToUnit ? SPFlagLocalToUnit : SPFlagZero) | @@ -1874,23 +1898,23 @@ private: DISPFlags SPFlags, DICompileUnit *Unit, DITemplateParameterArray TemplateParams, DISubprogram *Declaration, DINodeArray RetainedNodes, DITypeArray ThrownTypes, - StorageType Storage, bool ShouldCreate = true) { + DINodeArray Annotations, StorageType Storage, + bool ShouldCreate = true) { return getImpl(Context, Scope, getCanonicalMDString(Context, Name), getCanonicalMDString(Context, LinkageName), File, Line, Type, ScopeLine, ContainingType, VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams.get(), Declaration, - RetainedNodes.get(), ThrownTypes.get(), Storage, - ShouldCreate); + RetainedNodes.get(), ThrownTypes.get(), Annotations.get(), + Storage, ShouldCreate); } - static DISubprogram *getImpl(LLVMContext &Context, Metadata *Scope, - MDString *Name, MDString *LinkageName, - Metadata *File, unsigned Line, Metadata *Type, - unsigned ScopeLine, Metadata *ContainingType, - unsigned VirtualIndex, int ThisAdjustment, - DIFlags Flags, DISPFlags SPFlags, Metadata *Unit, - Metadata *TemplateParams, Metadata *Declaration, - Metadata *RetainedNodes, Metadata *ThrownTypes, - StorageType Storage, bool ShouldCreate = true); + static DISubprogram * + getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, + MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type, + unsigned ScopeLine, Metadata *ContainingType, unsigned VirtualIndex, + int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, Metadata *Unit, + Metadata *TemplateParams, Metadata *Declaration, + Metadata *RetainedNodes, Metadata *ThrownTypes, Metadata *Annotations, + StorageType Storage, bool ShouldCreate = true); TempDISubprogram cloneImpl() const { return getTemporary(getContext(), getScope(), getName(), getLinkageName(), @@ -1898,7 +1922,7 @@ private: getContainingType(), getVirtualIndex(), getThisAdjustment(), getFlags(), getSPFlags(), getUnit(), getTemplateParams(), getDeclaration(), - getRetainedNodes(), getThrownTypes()); + getRetainedNodes(), getThrownTypes(), getAnnotations()); } public: @@ -1910,10 +1934,10 @@ public: DIFlags Flags, DISPFlags SPFlags, DICompileUnit *Unit, DITemplateParameterArray TemplateParams = nullptr, DISubprogram *Declaration = nullptr, DINodeArray RetainedNodes = nullptr, - DITypeArray ThrownTypes = nullptr), + DITypeArray ThrownTypes = nullptr, DINodeArray Annotations = nullptr), (Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType, VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams, - Declaration, RetainedNodes, ThrownTypes)) + Declaration, RetainedNodes, ThrownTypes, Annotations)) DEFINE_MDNODE_GET( DISubprogram, @@ -1922,10 +1946,11 @@ public: Metadata *ContainingType, unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, Metadata *Unit, Metadata *TemplateParams = nullptr, Metadata *Declaration = nullptr, - Metadata *RetainedNodes = nullptr, Metadata *ThrownTypes = nullptr), + Metadata *RetainedNodes = nullptr, Metadata *ThrownTypes = nullptr, + Metadata *Annotations = nullptr), (Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType, VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams, - Declaration, RetainedNodes, ThrownTypes)) + Declaration, RetainedNodes, ThrownTypes, Annotations)) TempDISubprogram clone() const { return cloneImpl(); } @@ -1942,7 +1967,10 @@ public: unsigned getVirtualIndex() const { return VirtualIndex; } int getThisAdjustment() const { return ThisAdjustment; } unsigned getScopeLine() const { return ScopeLine; } - void setScopeLine(unsigned L) { assert(isDistinct()); ScopeLine = L; } + void setScopeLine(unsigned L) { + assert(isDistinct()); + ScopeLine = L; + } DIFlags getFlags() const { return Flags; } DISPFlags getSPFlags() const { return SPFlags; } bool isLocalToUnit() const { return getSPFlags() & SPFlagLocalToUnit; } @@ -2028,6 +2056,9 @@ public: DITypeArray getThrownTypes() const { return cast_or_null<MDTuple>(getRawThrownTypes()); } + DINodeArray getAnnotations() const { + return cast_or_null<MDTuple>(getRawAnnotations()); + } Metadata *getRawScope() const { return getOperand(1); } MDString *getRawName() const { return getOperandAs<MDString>(2); } @@ -2045,6 +2076,9 @@ public: Metadata *getRawThrownTypes() const { return getNumOperands() > 10 ? getOperandAs<Metadata>(10) : nullptr; } + Metadata *getRawAnnotations() const { + return getNumOperands() > 11 ? getOperandAs<Metadata>(11) : nullptr; + } void replaceRawLinkageName(MDString *LinkageName) { replaceOperandWith(3, LinkageName); @@ -2112,11 +2146,13 @@ class DILexicalBlock : public DILexicalBlockBase { } public: - DEFINE_MDNODE_GET(DILexicalBlock, (DILocalScope * Scope, DIFile *File, - unsigned Line, unsigned Column), + DEFINE_MDNODE_GET(DILexicalBlock, + (DILocalScope * Scope, DIFile *File, unsigned Line, + unsigned Column), (Scope, File, Line, Column)) - DEFINE_MDNODE_GET(DILexicalBlock, (Metadata * Scope, Metadata *File, - unsigned Line, unsigned Column), + DEFINE_MDNODE_GET(DILexicalBlock, + (Metadata * Scope, Metadata *File, unsigned Line, + unsigned Column), (Scope, File, Line, Column)) TempDILexicalBlock clone() const { return cloneImpl(); } @@ -2161,8 +2197,9 @@ class DILexicalBlockFile : public DILexicalBlockBase { } public: - DEFINE_MDNODE_GET(DILexicalBlockFile, (DILocalScope * Scope, DIFile *File, - unsigned Discriminator), + DEFINE_MDNODE_GET(DILexicalBlockFile, + (DILocalScope * Scope, DIFile *File, + unsigned Discriminator), (Scope, File, Discriminator)) DEFINE_MDNODE_GET(DILexicalBlockFile, (Metadata * Scope, Metadata *File, unsigned Discriminator), @@ -2212,7 +2249,8 @@ unsigned DILocation::getCopyIdentifier() const { return getCopyIdentifierFromDiscriminator(getDiscriminator()); } -Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D) const { +Optional<const DILocation *> +DILocation::cloneWithBaseDiscriminator(unsigned D) const { unsigned BD, DF, CI; if (EnableFSDiscriminator) { @@ -2230,7 +2268,8 @@ Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D) return None; } -Optional<const DILocation *> DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const { +Optional<const DILocation *> +DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const { assert(!EnableFSDiscriminator && "FSDiscriminator should not call this."); DF *= getDuplicationFactor(); @@ -2274,10 +2313,10 @@ class DINamespace : public DIScope { public: DEFINE_MDNODE_GET(DINamespace, - (DIScope *Scope, StringRef Name, bool ExportSymbols), + (DIScope * Scope, StringRef Name, bool ExportSymbols), (Scope, Name, ExportSymbols)) DEFINE_MDNODE_GET(DINamespace, - (Metadata *Scope, MDString *Name, bool ExportSymbols), + (Metadata * Scope, MDString *Name, bool ExportSymbols), (Scope, Name, ExportSymbols)) TempDINamespace clone() const { return cloneImpl(); } @@ -2426,7 +2465,7 @@ public: (StringRef Name, DIType *Type, bool IsDefault), (Name, Type, IsDefault)) DEFINE_MDNODE_GET(DITemplateTypeParameter, - (MDString *Name, Metadata *Type, bool IsDefault), + (MDString * Name, Metadata *Type, bool IsDefault), (Name, Type, IsDefault)) TempDITemplateTypeParameter clone() const { return cloneImpl(); } @@ -2819,7 +2858,8 @@ public: /// \param OffsetInBits Offset of the piece in bits. /// \param SizeInBits Size of the piece in bits. /// \return Creating a fragment expression may fail if \c Expr - /// contains arithmetic operations that would be truncated. + /// contains arithmetic operations that would be + /// truncated. static Optional<DIExpression *> createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits); @@ -2876,6 +2916,12 @@ public: return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_LLVM_entry_value; } + + /// Try to shorten an expression with an initial constant operand. + /// Returns a new expression and constant on success, or the original + /// expression and constant on failure. + std::pair<DIExpression *, const ConstantInt *> + constantFold(const ConstantInt *CI); }; inline bool operator==(const DIExpression::FragmentInfo &A, @@ -2927,46 +2973,47 @@ class DIGlobalVariable : public DIVariable { StringRef LinkageName, DIFile *File, unsigned Line, DIType *Type, bool IsLocalToUnit, bool IsDefinition, DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams, - uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true) { + uint32_t AlignInBits, DINodeArray Annotations, StorageType Storage, + bool ShouldCreate = true) { return getImpl(Context, Scope, getCanonicalMDString(Context, Name), getCanonicalMDString(Context, LinkageName), File, Line, Type, IsLocalToUnit, IsDefinition, StaticDataMemberDeclaration, - cast_or_null<Metadata>(TemplateParams), AlignInBits, Storage, - ShouldCreate); + cast_or_null<Metadata>(TemplateParams), AlignInBits, + Annotations.get(), Storage, ShouldCreate); } static DIGlobalVariable * getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition, Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams, - uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true); + uint32_t AlignInBits, Metadata *Annotations, StorageType Storage, + bool ShouldCreate = true); TempDIGlobalVariable cloneImpl() const { return getTemporary(getContext(), getScope(), getName(), getLinkageName(), getFile(), getLine(), getType(), isLocalToUnit(), isDefinition(), getStaticDataMemberDeclaration(), - getTemplateParams(), getAlignInBits()); + getTemplateParams(), getAlignInBits(), + getAnnotations()); } public: - DEFINE_MDNODE_GET(DIGlobalVariable, - (DIScope * Scope, StringRef Name, StringRef LinkageName, - DIFile *File, unsigned Line, DIType *Type, - bool IsLocalToUnit, bool IsDefinition, - DIDerivedType *StaticDataMemberDeclaration, - MDTuple *TemplateParams, uint32_t AlignInBits), - (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, - IsDefinition, StaticDataMemberDeclaration, TemplateParams, - AlignInBits)) - DEFINE_MDNODE_GET(DIGlobalVariable, - (Metadata * Scope, MDString *Name, MDString *LinkageName, - Metadata *File, unsigned Line, Metadata *Type, - bool IsLocalToUnit, bool IsDefinition, - Metadata *StaticDataMemberDeclaration, - Metadata *TemplateParams, uint32_t AlignInBits), - (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, - IsDefinition, StaticDataMemberDeclaration, TemplateParams, - AlignInBits)) + DEFINE_MDNODE_GET( + DIGlobalVariable, + (DIScope * Scope, StringRef Name, StringRef LinkageName, DIFile *File, + unsigned Line, DIType *Type, bool IsLocalToUnit, bool IsDefinition, + DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams, + uint32_t AlignInBits, DINodeArray Annotations), + (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, + StaticDataMemberDeclaration, TemplateParams, AlignInBits, Annotations)) + DEFINE_MDNODE_GET( + DIGlobalVariable, + (Metadata * Scope, MDString *Name, MDString *LinkageName, Metadata *File, + unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition, + Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams, + uint32_t AlignInBits, Metadata *Annotations), + (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition, + StaticDataMemberDeclaration, TemplateParams, AlignInBits, Annotations)) TempDIGlobalVariable clone() const { return cloneImpl(); } @@ -2977,11 +3024,15 @@ public: DIDerivedType *getStaticDataMemberDeclaration() const { return cast_or_null<DIDerivedType>(getRawStaticDataMemberDeclaration()); } + DINodeArray getAnnotations() const { + return cast_or_null<MDTuple>(getRawAnnotations()); + } MDString *getRawLinkageName() const { return getOperandAs<MDString>(5); } Metadata *getRawStaticDataMemberDeclaration() const { return getOperand(6); } Metadata *getRawTemplateParams() const { return getOperand(7); } MDTuple *getTemplateParams() const { return getOperandAs<MDTuple>(7); } + Metadata *getRawAnnotations() const { return getOperand(8); } static bool classof(const Metadata *MD) { return MD->getMetadataID() == DIGlobalVariableKind; @@ -2997,20 +3048,20 @@ class DICommonBlock : public DIScope { DICommonBlock(LLVMContext &Context, StorageType Storage, unsigned LineNo, ArrayRef<Metadata *> Ops) : DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block, - Ops), LineNo(LineNo) {} + Ops), + LineNo(LineNo) {} static DICommonBlock *getImpl(LLVMContext &Context, DIScope *Scope, DIGlobalVariable *Decl, StringRef Name, DIFile *File, unsigned LineNo, - StorageType Storage, - bool ShouldCreate = true) { + StorageType Storage, bool ShouldCreate = true) { return getImpl(Context, Scope, Decl, getCanonicalMDString(Context, Name), File, LineNo, Storage, ShouldCreate); } static DICommonBlock *getImpl(LLVMContext &Context, Metadata *Scope, Metadata *Decl, MDString *Name, Metadata *File, - unsigned LineNo, - StorageType Storage, bool ShouldCreate = true); + unsigned LineNo, StorageType Storage, + bool ShouldCreate = true); TempDICommonBlock cloneImpl() const { return getTemporary(getContext(), getScope(), getDecl(), getName(), @@ -3019,11 +3070,11 @@ class DICommonBlock : public DIScope { public: DEFINE_MDNODE_GET(DICommonBlock, - (DIScope *Scope, DIGlobalVariable *Decl, StringRef Name, + (DIScope * Scope, DIGlobalVariable *Decl, StringRef Name, DIFile *File, unsigned LineNo), (Scope, Decl, Name, File, LineNo)) DEFINE_MDNODE_GET(DICommonBlock, - (Metadata *Scope, Metadata *Decl, MDString *Name, + (Metadata * Scope, Metadata *Decl, MDString *Name, Metadata *File, unsigned LineNo), (Scope, Decl, Name, File, LineNo)) @@ -3069,34 +3120,39 @@ class DILocalVariable : public DIVariable { static DILocalVariable *getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name, DIFile *File, unsigned Line, DIType *Type, unsigned Arg, DIFlags Flags, - uint32_t AlignInBits, StorageType Storage, + uint32_t AlignInBits, DINodeArray Annotations, + StorageType Storage, bool ShouldCreate = true) { return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File, - Line, Type, Arg, Flags, AlignInBits, Storage, ShouldCreate); + Line, Type, Arg, Flags, AlignInBits, Annotations.get(), + Storage, ShouldCreate); } static DILocalVariable *getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, Metadata *File, unsigned Line, Metadata *Type, unsigned Arg, DIFlags Flags, - uint32_t AlignInBits, StorageType Storage, + uint32_t AlignInBits, Metadata *Annotations, + StorageType Storage, bool ShouldCreate = true); TempDILocalVariable cloneImpl() const { return getTemporary(getContext(), getScope(), getName(), getFile(), getLine(), getType(), getArg(), getFlags(), - getAlignInBits()); + getAlignInBits(), getAnnotations()); } public: DEFINE_MDNODE_GET(DILocalVariable, (DILocalScope * Scope, StringRef Name, DIFile *File, unsigned Line, DIType *Type, unsigned Arg, DIFlags Flags, - uint32_t AlignInBits), - (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits)) + uint32_t AlignInBits, DINodeArray Annotations), + (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits, + Annotations)) DEFINE_MDNODE_GET(DILocalVariable, (Metadata * Scope, MDString *Name, Metadata *File, - unsigned Line, Metadata *Type, unsigned Arg, - DIFlags Flags, uint32_t AlignInBits), - (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits)) + unsigned Line, Metadata *Type, unsigned Arg, DIFlags Flags, + uint32_t AlignInBits, Metadata *Annotations), + (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits, + Annotations)) TempDILocalVariable clone() const { return cloneImpl(); } @@ -3111,6 +3167,11 @@ public: unsigned getArg() const { return Arg; } DIFlags getFlags() const { return Flags; } + DINodeArray getAnnotations() const { + return cast_or_null<MDTuple>(getRawAnnotations()); + } + Metadata *getRawAnnotations() const { return getOperand(4); } + bool isArtificial() const { return getFlags() & FlagArtificial; } bool isObjectPointer() const { return getFlags() & FlagObjectPointer; } @@ -3141,16 +3202,14 @@ class DILabel : public DINode { : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops), Line(Line) {} ~DILabel() = default; - static DILabel *getImpl(LLVMContext &Context, DIScope *Scope, - StringRef Name, DIFile *File, unsigned Line, - StorageType Storage, + static DILabel *getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name, + DIFile *File, unsigned Line, StorageType Storage, bool ShouldCreate = true) { return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File, Line, Storage, ShouldCreate); } - static DILabel *getImpl(LLVMContext &Context, Metadata *Scope, - MDString *Name, Metadata *File, unsigned Line, - StorageType Storage, + static DILabel *getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name, + Metadata *File, unsigned Line, StorageType Storage, bool ShouldCreate = true); TempDILabel cloneImpl() const { @@ -3295,31 +3354,33 @@ class DIImportedEntity : public DINode { static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag, DIScope *Scope, DINode *Entity, DIFile *File, unsigned Line, StringRef Name, - StorageType Storage, + DINodeArray Elements, StorageType Storage, bool ShouldCreate = true) { return getImpl(Context, Tag, Scope, Entity, File, Line, - getCanonicalMDString(Context, Name), Storage, ShouldCreate); + getCanonicalMDString(Context, Name), Elements.get(), Storage, + ShouldCreate); } - static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag, - Metadata *Scope, Metadata *Entity, - Metadata *File, unsigned Line, - MDString *Name, StorageType Storage, - bool ShouldCreate = true); + static DIImportedEntity * + getImpl(LLVMContext &Context, unsigned Tag, Metadata *Scope, Metadata *Entity, + Metadata *File, unsigned Line, MDString *Name, Metadata *Elements, + StorageType Storage, bool ShouldCreate = true); TempDIImportedEntity cloneImpl() const { return getTemporary(getContext(), getTag(), getScope(), getEntity(), - getFile(), getLine(), getName()); + getFile(), getLine(), getName(), getElements()); } public: DEFINE_MDNODE_GET(DIImportedEntity, (unsigned Tag, DIScope *Scope, DINode *Entity, DIFile *File, - unsigned Line, StringRef Name = ""), - (Tag, Scope, Entity, File, Line, Name)) + unsigned Line, StringRef Name = "", + DINodeArray Elements = nullptr), + (Tag, Scope, Entity, File, Line, Name, Elements)) DEFINE_MDNODE_GET(DIImportedEntity, (unsigned Tag, Metadata *Scope, Metadata *Entity, - Metadata *File, unsigned Line, MDString *Name), - (Tag, Scope, Entity, File, Line, Name)) + Metadata *File, unsigned Line, MDString *Name, + Metadata *Elements = nullptr), + (Tag, Scope, Entity, File, Line, Name, Elements)) TempDIImportedEntity clone() const { return cloneImpl(); } @@ -3328,11 +3389,15 @@ public: DINode *getEntity() const { return cast_or_null<DINode>(getRawEntity()); } StringRef getName() const { return getStringOperand(2); } DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); } + DINodeArray getElements() const { + return cast_or_null<MDTuple>(getRawElements()); + } Metadata *getRawScope() const { return getOperand(0); } Metadata *getRawEntity() const { return getOperand(1); } MDString *getRawName() const { return getOperandAs<MDString>(2); } Metadata *getRawFile() const { return getOperand(3); } + Metadata *getRawElements() const { return getOperand(4); } static bool classof(const Metadata *MD) { return MD->getMetadataID() == DIImportedEntityKind; @@ -3457,11 +3522,13 @@ class DIMacro : public DIMacroNode { } public: - DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, StringRef Name, - StringRef Value = ""), + DEFINE_MDNODE_GET(DIMacro, + (unsigned MIType, unsigned Line, StringRef Name, + StringRef Value = ""), (MIType, Line, Name, Value)) - DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, MDString *Name, - MDString *Value), + DEFINE_MDNODE_GET(DIMacro, + (unsigned MIType, unsigned Line, MDString *Name, + MDString *Value), (MIType, Line, Name, Value)) TempDIMacro clone() const { return cloneImpl(); } @@ -3508,11 +3575,13 @@ class DIMacroFile : public DIMacroNode { } public: - DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line, DIFile *File, - DIMacroNodeArray Elements), + DEFINE_MDNODE_GET(DIMacroFile, + (unsigned MIType, unsigned Line, DIFile *File, + DIMacroNodeArray Elements), (MIType, Line, File, Elements)) - DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line, - Metadata *File, Metadata *Elements), + DEFINE_MDNODE_GET(DIMacroFile, + (unsigned MIType, unsigned Line, Metadata *File, + Metadata *Elements), (MIType, Line, File, Elements)) TempDIMacroFile clone() const { return cloneImpl(); } diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index b68a912b5f70..8a1b26e699e3 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -49,10 +49,11 @@ public: /// This enum is just used to hold constants we need for IntegerType. enum { MIN_INT_BITS = 1, ///< Minimum number of bits that can be specified - MAX_INT_BITS = (1<<24)-1 ///< Maximum number of bits that can be specified + MAX_INT_BITS = (1<<23) ///< Maximum number of bits that can be specified ///< Note that bit width is stored in the Type classes SubclassData field - ///< which has 24 bits. This yields a maximum bit width of 16,777,215 - ///< bits. + ///< which has 24 bits. SelectionDAG type legalization can require a + ///< power of 2 IntegerType, so limit to the largest representable power + ///< of 2, 8388608. }; /// This static method is the primary way of constructing an IntegerType. diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index 5064f4f4edf7..73b0be43e136 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -33,6 +33,7 @@ namespace llvm { // Forward declarations. class DiagnosticPrinter; +class CallInst; class Function; class Instruction; class InstructionCost; @@ -79,6 +80,7 @@ enum DiagnosticKind { DK_PGOProfile, DK_Unsupported, DK_SrcMgr, + DK_DontCall, DK_FirstPluginKind // Must be last value to work with // getNextAvailablePluginDiagnosticKind }; @@ -194,10 +196,9 @@ public: /// \p The function that is concerned by this stack size diagnostic. /// \p The computed stack size. DiagnosticInfoResourceLimit(const Function &Fn, const char *ResourceName, - uint64_t ResourceSize, + uint64_t ResourceSize, uint64_t ResourceLimit, DiagnosticSeverity Severity = DS_Warning, - DiagnosticKind Kind = DK_ResourceLimit, - uint64_t ResourceLimit = 0) + DiagnosticKind Kind = DK_ResourceLimit) : DiagnosticInfo(Kind, Severity), Fn(Fn), ResourceName(ResourceName), ResourceSize(ResourceSize), ResourceLimit(ResourceLimit) {} @@ -218,10 +219,10 @@ class DiagnosticInfoStackSize : public DiagnosticInfoResourceLimit { void anchor() override; public: DiagnosticInfoStackSize(const Function &Fn, uint64_t StackSize, - DiagnosticSeverity Severity = DS_Warning, - uint64_t StackLimit = 0) - : DiagnosticInfoResourceLimit(Fn, "stack frame size", StackSize, Severity, - DK_StackSize, StackLimit) {} + uint64_t StackLimit, + DiagnosticSeverity Severity = DS_Warning) + : DiagnosticInfoResourceLimit(Fn, "stack frame size", StackSize, + StackLimit, Severity, DK_StackSize) {} uint64_t getStackSize() const { return getResourceSize(); } uint64_t getStackLimit() const { return getResourceLimit(); } @@ -1070,6 +1071,27 @@ public: } }; +void diagnoseDontCall(const CallInst &CI); + +class DiagnosticInfoDontCall : public DiagnosticInfo { + StringRef CalleeName; + StringRef Note; + unsigned LocCookie; + +public: + DiagnosticInfoDontCall(StringRef CalleeName, StringRef Note, + DiagnosticSeverity DS, unsigned LocCookie) + : DiagnosticInfo(DK_DontCall, DS), CalleeName(CalleeName), Note(Note), + LocCookie(LocCookie) {} + StringRef getFunctionName() const { return CalleeName; } + StringRef getNote() const { return Note; } + unsigned getLocCookie() const { return LocCookie; } + void print(DiagnosticPrinter &DP) const override; + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_DontCall; + } +}; + } // end namespace llvm #endif // LLVM_IR_DIAGNOSTICINFO_H diff --git a/llvm/include/llvm/IR/DiagnosticPrinter.h b/llvm/include/llvm/IR/DiagnosticPrinter.h index 102932ceefa5..2df6fc3dfe73 100644 --- a/llvm/include/llvm/IR/DiagnosticPrinter.h +++ b/llvm/include/llvm/IR/DiagnosticPrinter.h @@ -1,4 +1,4 @@ -//===- llvm/Support/DiagnosticPrinter.h - Diagnostic Printer ----*- C++ -*-===// +//===- llvm/IR/DiagnosticPrinter.h - Diagnostic Printer ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h index 4d140c3ad0f2..475355af5647 100644 --- a/llvm/include/llvm/IR/Dominators.h +++ b/llvm/include/llvm/IR/Dominators.h @@ -277,6 +277,12 @@ struct DominatorTreeVerifierPass : PassInfoMixin<DominatorTreeVerifierPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; +/// Enables verification of dominator trees. +/// +/// This check is expensive and is disabled by default. `-verify-dom-info` +/// allows selectively enabling the check without needing to recompile. +extern bool VerifyDomInfo; + /// Legacy analysis pass which computes a \c DominatorTree. class DominatorTreeWrapperPass : public FunctionPass { DominatorTree DT; diff --git a/llvm/include/llvm/IR/FPEnv.h b/llvm/include/llvm/IR/FPEnv.h index 621540000b5c..bf435ec6d109 100644 --- a/llvm/include/llvm/IR/FPEnv.h +++ b/llvm/include/llvm/IR/FPEnv.h @@ -39,24 +39,30 @@ enum ExceptionBehavior : uint8_t { /// Returns a valid RoundingMode enumerator when given a string /// that is valid as input in constrained intrinsic rounding mode /// metadata. -Optional<RoundingMode> StrToRoundingMode(StringRef); +Optional<RoundingMode> convertStrToRoundingMode(StringRef); /// For any RoundingMode enumerator, returns a string valid as input in /// constrained intrinsic rounding mode metadata. -Optional<StringRef> RoundingModeToStr(RoundingMode); +Optional<StringRef> convertRoundingModeToStr(RoundingMode); /// Returns a valid ExceptionBehavior enumerator when given a string /// valid as input in constrained intrinsic exception behavior metadata. -Optional<fp::ExceptionBehavior> StrToExceptionBehavior(StringRef); +Optional<fp::ExceptionBehavior> convertStrToExceptionBehavior(StringRef); /// For any ExceptionBehavior enumerator, returns a string valid as /// input in constrained intrinsic exception behavior metadata. -Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior); +Optional<StringRef> convertExceptionBehaviorToStr(fp::ExceptionBehavior); /// Returns true if the exception handling behavior and rounding mode /// match what is used in the default floating point environment. inline bool isDefaultFPEnvironment(fp::ExceptionBehavior EB, RoundingMode RM) { return EB == fp::ebIgnore && RM == RoundingMode::NearestTiesToEven; } + +/// Returns true if the rounding mode RM may be QRM at compile time or +/// at run time. +inline bool canRoundingModeBe(RoundingMode RM, RoundingMode QRM) { + return RM == QRM || RM == RoundingMode::Dynamic; +} } #endif diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index e0094e2afff2..669418eacbb0 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -48,6 +48,7 @@ typedef unsigned ID; class AssemblyAnnotationWriter; class Constant; +struct DenormalMode; class DISubprogram; class LLVMContext; class Module; @@ -58,7 +59,8 @@ class User; class BranchProbabilityInfo; class BlockFrequencyInfo; -class Function : public GlobalObject, public ilist_node<Function> { +class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, + public ilist_node<Function> { public: using BasicBlockListType = SymbolTableList<BasicBlock>; @@ -245,72 +247,22 @@ public: setValueSubclassData((getSubclassDataFromValue() & 0xc00f) | (ID << 4)); } - /// Return the attribute list for this Function. - AttributeList getAttributes() const { return AttributeSets; } - - /// Set the attribute list for this Function. - void setAttributes(AttributeList Attrs) { AttributeSets = Attrs; } - - /// Add function attributes to this function. - void addFnAttr(Attribute::AttrKind Kind) { - addAttribute(AttributeList::FunctionIndex, Kind); - } - - /// Add function attributes to this function. - void addFnAttr(StringRef Kind, StringRef Val = StringRef()) { - addAttribute(AttributeList::FunctionIndex, - Attribute::get(getContext(), Kind, Val)); - } - - /// Add function attributes to this function. - void addFnAttr(Attribute Attr) { - addAttribute(AttributeList::FunctionIndex, Attr); - } - - /// Remove function attributes from this function. - void removeFnAttr(Attribute::AttrKind Kind) { - removeAttribute(AttributeList::FunctionIndex, Kind); - } - - /// Remove function attribute from this function. - void removeFnAttr(StringRef Kind) { - setAttributes(getAttributes().removeAttribute( - getContext(), AttributeList::FunctionIndex, Kind)); - } - - /// A function will have the "coroutine.presplit" attribute if it's - /// a coroutine and has not gone through full CoroSplit pass. - bool isPresplitCoroutine() const { - return hasFnAttribute("coroutine.presplit"); - } - - enum ProfileCountType { PCT_Invalid, PCT_Real, PCT_Synthetic }; + enum ProfileCountType { PCT_Real, PCT_Synthetic }; /// Class to represent profile counts. /// /// This class represents both real and synthetic profile counts. class ProfileCount { private: - uint64_t Count; - ProfileCountType PCT; - static ProfileCount Invalid; + uint64_t Count = 0; + ProfileCountType PCT = PCT_Real; public: - ProfileCount() : Count(-1), PCT(PCT_Invalid) {} ProfileCount(uint64_t Count, ProfileCountType PCT) : Count(Count), PCT(PCT) {} - bool hasValue() const { return PCT != PCT_Invalid; } uint64_t getCount() const { return Count; } ProfileCountType getType() const { return PCT; } bool isSynthetic() const { return PCT == PCT_Synthetic; } - explicit operator bool() { return hasValue(); } - bool operator!() const { return !hasValue(); } - // Update the count retaining the same profile count type. - ProfileCount &setCount(uint64_t C) { - Count = C; - return *this; - } - static ProfileCount getInvalid() { return ProfileCount(-1, PCT_Invalid); } }; /// Set the entry count for this function. @@ -330,7 +282,7 @@ public: /// /// Entry count is the number of times the function was executed. /// When AllowSynthetic is false, only pgo_data will be returned. - ProfileCount getEntryCount(bool AllowSynthetic = false) const; + Optional<ProfileCount> getEntryCount(bool AllowSynthetic = false) const; /// Return true if the function is annotated with profile data. /// @@ -351,43 +303,6 @@ public: /// Get the section prefix for this function. Optional<StringRef> getSectionPrefix() const; - /// Return true if the function has the attribute. - bool hasFnAttribute(Attribute::AttrKind Kind) const { - return AttributeSets.hasFnAttribute(Kind); - } - - /// Return true if the function has the attribute. - bool hasFnAttribute(StringRef Kind) const { - return AttributeSets.hasFnAttribute(Kind); - } - - /// Return the attribute for the given attribute kind. - Attribute getFnAttribute(Attribute::AttrKind Kind) const { - return getAttribute(AttributeList::FunctionIndex, Kind); - } - - /// Return the attribute for the given attribute kind. - Attribute getFnAttribute(StringRef Kind) const { - return getAttribute(AttributeList::FunctionIndex, Kind); - } - - /// Return the stack alignment for the function. - unsigned getFnStackAlignment() const { - if (!hasFnAttribute(Attribute::StackAlignment)) - return 0; - if (const auto MA = - AttributeSets.getStackAlignment(AttributeList::FunctionIndex)) - return MA->value(); - return 0; - } - - /// Return the stack alignment for the function. - MaybeAlign getFnStackAlign() const { - if (!hasFnAttribute(Attribute::StackAlignment)) - return None; - return AttributeSets.getStackAlignment(AttributeList::FunctionIndex); - } - /// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm /// to use during code generation. bool hasGC() const { @@ -397,17 +312,36 @@ public: void setGC(std::string Str); void clearGC(); - /// Returns true if the function has ssp, sspstrong, or sspreq fn attrs. - bool hasStackProtectorFnAttr() const; + /// Return the attribute list for this Function. + AttributeList getAttributes() const { return AttributeSets; } - /// adds the attribute to the list of attributes. - void addAttribute(unsigned i, Attribute::AttrKind Kind); + /// Set the attribute list for this Function. + void setAttributes(AttributeList Attrs) { AttributeSets = Attrs; } + // TODO: remove non-AtIndex versions of these methods. /// adds the attribute to the list of attributes. - void addAttribute(unsigned i, Attribute Attr); + void addAttributeAtIndex(unsigned i, Attribute Attr); + + /// Add function attributes to this function. + void addFnAttr(Attribute::AttrKind Kind); + + /// Add function attributes to this function. + void addFnAttr(StringRef Kind, StringRef Val = StringRef()); + + /// Add function attributes to this function. + void addFnAttr(Attribute Attr); + + /// Add function attributes to this function. + void addFnAttrs(const AttrBuilder &Attrs); - /// adds the attributes to the list of attributes. - void addAttributes(unsigned i, const AttrBuilder &Attrs); + /// Add return value attributes to this function. + void addRetAttr(Attribute::AttrKind Kind); + + /// Add return value attributes to this function. + void addRetAttr(Attribute Attr); + + /// Add return value attributes to this function. + void addRetAttrs(const AttrBuilder &Attrs); /// adds the attribute to the list of attributes for the given arg. void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); @@ -419,13 +353,27 @@ public: void addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs); /// removes the attribute from the list of attributes. - void removeAttribute(unsigned i, Attribute::AttrKind Kind); + void removeAttributeAtIndex(unsigned i, Attribute::AttrKind Kind); /// removes the attribute from the list of attributes. - void removeAttribute(unsigned i, StringRef Kind); + void removeAttributeAtIndex(unsigned i, StringRef Kind); + + /// Remove function attributes from this function. + void removeFnAttr(Attribute::AttrKind Kind); + + /// Remove function attribute from this function. + void removeFnAttr(StringRef Kind); + + void removeFnAttrs(const AttrBuilder &Attrs); - /// removes the attributes from the list of attributes. - void removeAttributes(unsigned i, const AttrBuilder &Attrs); + /// removes the attribute from the return value list of attributes. + void removeRetAttr(Attribute::AttrKind Kind); + + /// removes the attribute from the return value list of attributes. + void removeRetAttr(StringRef Kind); + + /// removes the attributes from the return value list of attributes. + void removeRetAttrs(const AttrBuilder &Attrs); /// removes the attribute from the list of attributes. void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind); @@ -436,54 +384,57 @@ public: /// removes the attribute from the list of attributes. void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs); - /// removes noundef and other attributes that imply undefined behavior if a - /// `undef` or `poison` value is passed from the list of attributes. - void removeParamUndefImplyingAttrs(unsigned ArgNo); + /// Return true if the function has the attribute. + bool hasFnAttribute(Attribute::AttrKind Kind) const; - /// check if an attributes is in the list of attributes. - bool hasAttribute(unsigned i, Attribute::AttrKind Kind) const { - return getAttributes().hasAttribute(i, Kind); - } + /// Return true if the function has the attribute. + bool hasFnAttribute(StringRef Kind) const; - /// check if an attributes is in the list of attributes. - bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const { - return getAttributes().hasParamAttribute(ArgNo, Kind); - } + /// check if an attribute is in the list of attributes for the return value. + bool hasRetAttribute(Attribute::AttrKind Kind) const; - /// gets the specified attribute from the list of attributes. - Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const { - return getAttributes().getParamAttr(ArgNo, Kind); - } + /// check if an attributes is in the list of attributes. + bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const; /// gets the attribute from the list of attributes. - Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const { - return AttributeSets.getAttribute(i, Kind); - } + Attribute getAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) const; /// gets the attribute from the list of attributes. - Attribute getAttribute(unsigned i, StringRef Kind) const { - return AttributeSets.getAttribute(i, Kind); + Attribute getAttributeAtIndex(unsigned i, StringRef Kind) const; + + /// Return the attribute for the given attribute kind. + Attribute getFnAttribute(Attribute::AttrKind Kind) const; + + /// Return the attribute for the given attribute kind. + Attribute getFnAttribute(StringRef Kind) const; + + /// gets the specified attribute from the list of attributes. + Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const; + + /// removes noundef and other attributes that imply undefined behavior if a + /// `undef` or `poison` value is passed from the list of attributes. + void removeParamUndefImplyingAttrs(unsigned ArgNo); + + /// Return the stack alignment for the function. + MaybeAlign getFnStackAlign() const { + return AttributeSets.getFnStackAlignment(); } - /// adds the dereferenceable attribute to the list of attributes. - void addDereferenceableAttr(unsigned i, uint64_t Bytes); + /// Returns true if the function has ssp, sspstrong, or sspreq fn attrs. + bool hasStackProtectorFnAttr() const; /// adds the dereferenceable attribute to the list of attributes for /// the given arg. void addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes); /// adds the dereferenceable_or_null attribute to the list of - /// attributes. - void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes); - - /// adds the dereferenceable_or_null attribute to the list of /// attributes for the given arg. void addDereferenceableOrNullParamAttr(unsigned ArgNo, uint64_t Bytes); /// Extract the alignment for a call or parameter (0=unknown). /// FIXME: Remove this function once transition to Align is over. /// Use getParamAlign() instead. - unsigned getParamAlignment(unsigned ArgNo) const { + uint64_t getParamAlignment(unsigned ArgNo) const { if (const auto MA = getParamAlign(ArgNo)) return MA->value(); return 0; @@ -517,11 +468,9 @@ public: return AttributeSets.getParamByRefType(ArgNo); } - /// Extract the number of dereferenceable bytes for a call or - /// parameter (0=unknown). - /// @param i AttributeList index, referring to a return value or argument. - uint64_t getDereferenceableBytes(unsigned i) const { - return AttributeSets.getDereferenceableBytes(i); + /// Extract the preallocated type for a parameter. + Type *getParamPreallocatedType(unsigned ArgNo) const { + return AttributeSets.getParamPreallocatedType(ArgNo); } /// Extract the number of dereferenceable bytes for a parameter. @@ -530,13 +479,6 @@ public: return AttributeSets.getParamDereferenceableBytes(ArgNo); } - /// Extract the number of dereferenceable_or_null bytes for a call or - /// parameter (0=unknown). - /// @param i AttributeList index, referring to a return value or argument. - uint64_t getDereferenceableOrNullBytes(unsigned i) const { - return AttributeSets.getDereferenceableOrNullBytes(i); - } - /// Extract the number of dereferenceable_or_null bytes for a /// parameter. /// @param ArgNo AttributeList ArgNo, referring to an argument. @@ -544,6 +486,12 @@ public: return AttributeSets.getParamDereferenceableOrNullBytes(ArgNo); } + /// A function will have the "coroutine.presplit" attribute if it's + /// a coroutine and has not gone through full CoroSplit pass. + bool isPresplitCoroutine() const { + return hasFnAttribute("coroutine.presplit"); + } + /// Determine if the function does not access memory. bool doesNotAccessMemory() const { return hasFnAttribute(Attribute::ReadNone); @@ -692,19 +640,16 @@ public: /// Determine if the function returns a structure through first /// or second pointer argument. bool hasStructRetAttr() const { - return AttributeSets.hasParamAttribute(0, Attribute::StructRet) || - AttributeSets.hasParamAttribute(1, Attribute::StructRet); + return AttributeSets.hasParamAttr(0, Attribute::StructRet) || + AttributeSets.hasParamAttr(1, Attribute::StructRet); } /// Determine if the parameter or return value is marked with NoAlias /// attribute. bool returnDoesNotAlias() const { - return AttributeSets.hasAttribute(AttributeList::ReturnIndex, - Attribute::NoAlias); - } - void setReturnDoesNotAlias() { - addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); + return AttributeSets.hasRetAttr(Attribute::NoAlias); } + void setReturnDoesNotAlias() { addRetAttr(Attribute::NoAlias); } /// Do not optimize this function (-O0). bool hasOptNone() const { return hasFnAttribute(Attribute::OptimizeNone); } @@ -904,13 +849,14 @@ public: /// hasAddressTaken - returns true if there are any uses of this function /// other than direct calls or invokes to it, or blockaddress expressions. /// Optionally passes back an offending user for diagnostic purposes, - /// ignores callback uses, assume like pointer annotation calls, and - /// references in llvm.used and llvm.compiler.used variables. - /// + /// ignores callback uses, assume like pointer annotation calls, references in + /// llvm.used and llvm.compiler.used variables, and operand bundle + /// "clang.arc.attachedcall". bool hasAddressTaken(const User ** = nullptr, bool IgnoreCallbackUses = false, bool IgnoreAssumeLikeCalls = true, - bool IngoreLLVMUsed = false) const; + bool IngoreLLVMUsed = false, + bool IgnoreARCAttachedCall = false) const; /// isDefTriviallyDead - Return true if it is trivially safe to remove /// this function definition from the module (because it isn't externally diff --git a/llvm/include/llvm/IR/GCStrategy.h b/llvm/include/llvm/IR/GCStrategy.h index a69958d596c6..4fa8e3a8dcf4 100644 --- a/llvm/include/llvm/IR/GCStrategy.h +++ b/llvm/include/llvm/IR/GCStrategy.h @@ -131,6 +131,9 @@ public: /// GCMetadataPrinterRegistery as well. using GCRegistry = Registry<GCStrategy>; +/// Lookup the GCStrategy object associated with the given gc name. +std::unique_ptr<GCStrategy> getGCStrategy(const StringRef Name); + } // end namespace llvm #endif // LLVM_IR_GCSTRATEGY_H diff --git a/llvm/include/llvm/IR/GlobalAlias.h b/llvm/include/llvm/IR/GlobalAlias.h index f2d9b9676ec9..01134448a8fa 100644 --- a/llvm/include/llvm/IR/GlobalAlias.h +++ b/llvm/include/llvm/IR/GlobalAlias.h @@ -15,7 +15,8 @@ #define LLVM_IR_GLOBALALIAS_H #include "llvm/ADT/ilist_node.h" -#include "llvm/IR/GlobalIndirectSymbol.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/OperandTraits.h" #include "llvm/IR/Value.h" namespace llvm { @@ -24,8 +25,7 @@ class Twine; class Module; template <typename ValueSubClass> class SymbolTableListTraits; -class GlobalAlias : public GlobalIndirectSymbol, - public ilist_node<GlobalAlias> { +class GlobalAlias : public GlobalValue, public ilist_node<GlobalAlias> { friend class SymbolTableListTraits<GlobalAlias>; GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, @@ -58,6 +58,17 @@ public: // Linkage, Type, Parent and AddressSpace taken from the Aliasee. static GlobalAlias *create(const Twine &Name, GlobalValue *Aliasee); + // allocate space for exactly one operand + void *operator new(size_t S) { return User::operator new(S, 1); } + void operator delete(void *Ptr) { User::operator delete(Ptr); } + + /// Provide fast operand accessors + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); + + void copyAttributesFrom(const GlobalAlias *Src) { + GlobalValue::copyAttributesFrom(Src); + } + /// removeFromParent - This method unlinks 'this' from the containing module, /// but does not delete it. /// @@ -71,10 +82,14 @@ public: /// These methods retrieve and set alias target. void setAliasee(Constant *Aliasee); const Constant *getAliasee() const { - return getIndirectSymbol(); + return static_cast<Constant *>(Op<0>().get()); } - Constant *getAliasee() { - return getIndirectSymbol(); + Constant *getAliasee() { return static_cast<Constant *>(Op<0>().get()); } + + const GlobalObject *getAliaseeObject() const; + GlobalObject *getAliaseeObject() { + return const_cast<GlobalObject *>( + static_cast<const GlobalAlias *>(this)->getAliaseeObject()); } static bool isValidLinkage(LinkageTypes L) { @@ -88,6 +103,12 @@ public: } }; +template <> +struct OperandTraits<GlobalAlias> + : public FixedNumOperandTraits<GlobalAlias, 1> {}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalAlias, Constant) + } // end namespace llvm #endif // LLVM_IR_GLOBALALIAS_H diff --git a/llvm/include/llvm/IR/GlobalIFunc.h b/llvm/include/llvm/IR/GlobalIFunc.h index ddd29c8a4a19..10088ee2fff4 100644 --- a/llvm/include/llvm/IR/GlobalIFunc.h +++ b/llvm/include/llvm/IR/GlobalIFunc.h @@ -18,7 +18,9 @@ #define LLVM_IR_GLOBALIFUNC_H #include "llvm/ADT/ilist_node.h" -#include "llvm/IR/GlobalIndirectSymbol.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/OperandTraits.h" #include "llvm/IR/Value.h" namespace llvm { @@ -29,8 +31,7 @@ class Module; // Traits class for using GlobalIFunc in symbol table in Module. template <typename ValueSubClass> class SymbolTableListTraits; -class GlobalIFunc final : public GlobalIndirectSymbol, - public ilist_node<GlobalIFunc> { +class GlobalIFunc final : public GlobalObject, public ilist_node<GlobalIFunc> { friend class SymbolTableListTraits<GlobalIFunc>; GlobalIFunc(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, @@ -46,6 +47,17 @@ public: LinkageTypes Linkage, const Twine &Name, Constant *Resolver, Module *Parent); + // allocate space for exactly one operand + void *operator new(size_t S) { return User::operator new(S, 1); } + void operator delete(void *Ptr) { User::operator delete(Ptr); } + + /// Provide fast operand accessors + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); + + void copyAttributesFrom(const GlobalIFunc *Src) { + GlobalObject::copyAttributesFrom(Src); + } + /// This method unlinks 'this' from the containing module, but does not /// delete it. void removeFromParent(); @@ -54,14 +66,22 @@ public: void eraseFromParent(); /// These methods retrieve and set ifunc resolver function. - void setResolver(Constant *Resolver) { - setIndirectSymbol(Resolver); - } + void setResolver(Constant *Resolver) { Op<0>().set(Resolver); } const Constant *getResolver() const { - return getIndirectSymbol(); + return static_cast<Constant *>(Op<0>().get()); } - Constant *getResolver() { - return getIndirectSymbol(); + Constant *getResolver() { return static_cast<Constant *>(Op<0>().get()); } + + // Return the resolver function after peeling off potential ConstantExpr + // indirection. + const Function *getResolverFunction() const; + Function *getResolverFunction() { + return const_cast<Function *>( + static_cast<const GlobalIFunc *>(this)->getResolverFunction()); + } + + static FunctionType *getResolverFunctionType(Type *IFuncValTy) { + return FunctionType::get(IFuncValTy->getPointerTo(), false); } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -70,6 +90,12 @@ public: } }; +template <> +struct OperandTraits<GlobalIFunc> + : public FixedNumOperandTraits<GlobalIFunc, 1> {}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalIFunc, Constant) + } // end namespace llvm #endif // LLVM_IR_GLOBALIFUNC_H diff --git a/llvm/include/llvm/IR/GlobalIndirectSymbol.h b/llvm/include/llvm/IR/GlobalIndirectSymbol.h deleted file mode 100644 index e45c7529885d..000000000000 --- a/llvm/include/llvm/IR/GlobalIndirectSymbol.h +++ /dev/null @@ -1,93 +0,0 @@ -//===- llvm/GlobalIndirectSymbol.h - GlobalIndirectSymbol class -*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the GlobalIndirectSymbol class, which -// is a base class for GlobalAlias and GlobalIFunc. It contains all common code -// for aliases and ifuncs. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_IR_GLOBALINDIRECTSYMBOL_H -#define LLVM_IR_GLOBALINDIRECTSYMBOL_H - -#include "llvm/IR/GlobalObject.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/OperandTraits.h" -#include "llvm/IR/User.h" -#include "llvm/IR/Value.h" -#include "llvm/Support/Casting.h" -#include <cstddef> - -namespace llvm { - -class GlobalIndirectSymbol : public GlobalValue { -protected: - GlobalIndirectSymbol(Type *Ty, ValueTy VTy, unsigned AddressSpace, - LinkageTypes Linkage, const Twine &Name, Constant *Symbol); - -public: - GlobalIndirectSymbol(const GlobalIndirectSymbol &) = delete; - GlobalIndirectSymbol &operator=(const GlobalIndirectSymbol &) = delete; - - // allocate space for exactly one operand - void *operator new(size_t S) { return User::operator new(S, 1); } - void operator delete(void *Ptr) { User::operator delete(Ptr); } - - /// Provide fast operand accessors - DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); - - void copyAttributesFrom(const GlobalValue *Src) { - GlobalValue::copyAttributesFrom(Src); - } - - /// These methods set and retrieve indirect symbol. - void setIndirectSymbol(Constant *Symbol) { - setOperand(0, Symbol); - } - const Constant *getIndirectSymbol() const { - return getOperand(0); - } - Constant *getIndirectSymbol() { - return const_cast<Constant *>( - static_cast<const GlobalIndirectSymbol *>(this)->getIndirectSymbol()); - } - - const GlobalObject *getBaseObject() const; - GlobalObject *getBaseObject() { - return const_cast<GlobalObject *>( - static_cast<const GlobalIndirectSymbol *>(this)->getBaseObject()); - } - - const GlobalObject *getBaseObject(const DataLayout &DL, APInt &Offset) const { - return dyn_cast<GlobalObject>( - getIndirectSymbol()->stripAndAccumulateInBoundsConstantOffsets(DL, - Offset)); - } - GlobalObject *getBaseObject(const DataLayout &DL, APInt &Offset) { - return const_cast<GlobalObject *>( - static_cast<const GlobalIndirectSymbol *>(this) - ->getBaseObject(DL, Offset)); - } - - // Methods for support type inquiry through isa, cast, and dyn_cast: - static bool classof(const Value *V) { - return V->getValueID() == Value::GlobalAliasVal || - V->getValueID() == Value::GlobalIFuncVal; - } -}; - -template <> -struct OperandTraits<GlobalIndirectSymbol> : - public FixedNumOperandTraits<GlobalIndirectSymbol, 1> { -}; - -DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalIndirectSymbol, Constant) - -} // end namespace llvm - -#endif // LLVM_IR_GLOBALINDIRECTSYMBOL_H diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h index 341fbec66080..e15cf718bb10 100644 --- a/llvm/include/llvm/IR/GlobalObject.h +++ b/llvm/include/llvm/IR/GlobalObject.h @@ -51,7 +51,7 @@ protected: Comdat *ObjComdat; enum { - LastAlignmentBit = 4, + LastAlignmentBit = 5, HasSectionHashEntryBit, GlobalObjectBits, @@ -68,7 +68,7 @@ public: GlobalObject(const GlobalObject &) = delete; /// FIXME: Remove this function once transition to Align is over. - unsigned getAlignment() const { + uint64_t getAlignment() const { MaybeAlign Align = getAlign(); return Align ? Align->value() : 0; } @@ -153,7 +153,8 @@ public: // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Value *V) { return V->getValueID() == Value::FunctionVal || - V->getValueID() == Value::GlobalVariableVal; + V->getValueID() == Value::GlobalVariableVal || + V->getValueID() == Value::GlobalIFuncVal; } private: diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h index cf704d1f2374..1818f2a8f3cc 100644 --- a/llvm/include/llvm/IR/GlobalValue.h +++ b/llvm/include/llvm/IR/GlobalValue.h @@ -302,11 +302,14 @@ public: static bool isAvailableExternallyLinkage(LinkageTypes Linkage) { return Linkage == AvailableExternallyLinkage; } + static bool isLinkOnceAnyLinkage(LinkageTypes Linkage) { + return Linkage == LinkOnceAnyLinkage; + } static bool isLinkOnceODRLinkage(LinkageTypes Linkage) { return Linkage == LinkOnceODRLinkage; } static bool isLinkOnceLinkage(LinkageTypes Linkage) { - return Linkage == LinkOnceAnyLinkage || Linkage == LinkOnceODRLinkage; + return isLinkOnceAnyLinkage(Linkage) || isLinkOnceODRLinkage(Linkage); } static bool isWeakAnyLinkage(LinkageTypes Linkage) { return Linkage == WeakAnyLinkage; @@ -433,6 +436,9 @@ public: return isAvailableExternallyLinkage(getLinkage()); } bool hasLinkOnceLinkage() const { return isLinkOnceLinkage(getLinkage()); } + bool hasLinkOnceAnyLinkage() const { + return isLinkOnceAnyLinkage(getLinkage()); + } bool hasLinkOnceODRLinkage() const { return isLinkOnceODRLinkage(getLinkage()); } @@ -548,10 +554,10 @@ public: return !(isDeclarationForLinker() || isWeakForLinker()); } - const GlobalObject *getBaseObject() const; - GlobalObject *getBaseObject() { + const GlobalObject *getAliaseeObject() const; + GlobalObject *getAliaseeObject() { return const_cast<GlobalObject *>( - static_cast<const GlobalValue *>(this)->getBaseObject()); + static_cast<const GlobalValue *>(this)->getAliaseeObject()); } /// Returns whether this is a reference to an absolute symbol. diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 8998ad0f94a9..b4e099e4ec20 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -316,7 +316,7 @@ public: /// Set the exception handling to be used with constrained floating point void setDefaultConstrainedExcept(fp::ExceptionBehavior NewExcept) { #ifndef NDEBUG - Optional<StringRef> ExceptStr = ExceptionBehaviorToStr(NewExcept); + Optional<StringRef> ExceptStr = convertExceptionBehaviorToStr(NewExcept); assert(ExceptStr.hasValue() && "Garbage strict exception behavior!"); #endif DefaultConstrainedExcept = NewExcept; @@ -325,7 +325,7 @@ public: /// Set the rounding mode handling to be used with constrained floating point void setDefaultConstrainedRounding(RoundingMode NewRounding) { #ifndef NDEBUG - Optional<StringRef> RoundingStr = RoundingModeToStr(NewRounding); + Optional<StringRef> RoundingStr = convertRoundingModeToStr(NewRounding); assert(RoundingStr.hasValue() && "Garbage strict rounding mode!"); #endif DefaultConstrainedRounding = NewRounding; @@ -351,7 +351,7 @@ public: } void setConstrainedFPCallAttr(CallBase *I) { - I->addAttribute(AttributeList::FunctionIndex, Attribute::StrictFP); + I->addFnAttr(Attribute::StrictFP); } void setDefaultOperandBundles(ArrayRef<OperandBundleDef> OpBundles) { @@ -697,12 +697,16 @@ public: MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); - /// Create a vector fadd reduction intrinsic of the source vector. - /// The first parameter is a scalar accumulator value for ordered reductions. + /// Create a sequential vector fadd reduction intrinsic of the source vector. + /// The first parameter is a scalar accumulator value. An unordered reduction + /// can be created by adding the reassoc fast-math flag to the resulting + /// sequential reduction. CallInst *CreateFAddReduce(Value *Acc, Value *Src); - /// Create a vector fmul reduction intrinsic of the source vector. - /// The first parameter is a scalar accumulator value for ordered reductions. + /// Create a sequential vector fmul reduction intrinsic of the source vector. + /// The first parameter is a scalar accumulator value. An unordered reduction + /// can be created by adding the reassoc fast-math flag to the resulting + /// sequential reduction. CallInst *CreateFMulReduce(Value *Acc, Value *Src); /// Create a vector int add reduction intrinsic of the source vector. @@ -1172,7 +1176,7 @@ private: if (Rounding.hasValue()) UseRounding = Rounding.getValue(); - Optional<StringRef> RoundingStr = RoundingModeToStr(UseRounding); + Optional<StringRef> RoundingStr = convertRoundingModeToStr(UseRounding); assert(RoundingStr.hasValue() && "Garbage strict rounding mode!"); auto *RoundingMDS = MDString::get(Context, RoundingStr.getValue()); @@ -1185,7 +1189,7 @@ private: if (Except.hasValue()) UseExcept = Except.getValue(); - Optional<StringRef> ExceptStr = ExceptionBehaviorToStr(UseExcept); + Optional<StringRef> ExceptStr = convertExceptionBehaviorToStr(UseExcept); assert(ExceptStr.hasValue() && "Garbage strict exception behavior!"); auto *ExceptMDS = MDString::get(Context, ExceptStr.getValue()); @@ -2448,6 +2452,16 @@ public: return CreateExtractElement(Vec, getInt64(Idx), Name); } + Value *CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, + const Twine &Name = "") { + return CreateInsertElement(PoisonValue::get(VecTy), NewElt, Idx, Name); + } + + Value *CreateInsertElement(Type *VecTy, Value *NewElt, uint64_t Idx, + const Twine &Name = "") { + return CreateInsertElement(PoisonValue::get(VecTy), NewElt, Idx, Name); + } + Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name = "") { if (auto *VC = dyn_cast<Constant>(Vec)) diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index ef2c279ed455..143a87f4997d 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -19,6 +19,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -755,6 +756,20 @@ public: using PredicateField = Bitfield::Element<Predicate, 0, 6, LAST_ICMP_PREDICATE>; + /// Returns the sequence of all FCmp predicates. + static auto FCmpPredicates() { + return enum_seq_inclusive(Predicate::FIRST_FCMP_PREDICATE, + Predicate::LAST_FCMP_PREDICATE, + force_iteration_on_noniterable_enum); + } + + /// Returns the sequence of all ICmp predicates. + static auto ICmpPredicates() { + return enum_seq_inclusive(Predicate::FIRST_ICMP_PREDICATE, + Predicate::LAST_ICMP_PREDICATE, + force_iteration_on_noniterable_enum); + } + protected: CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred, Value *LHS, Value *RHS, const Twine &Name = "", @@ -1325,33 +1340,23 @@ public: bool arg_empty() const { return arg_end() == arg_begin(); } unsigned arg_size() const { return arg_end() - arg_begin(); } - // Legacy API names that duplicate the above and will be removed once users - // are migrated. - iterator_range<User::op_iterator> arg_operands() { - return make_range(arg_begin(), arg_end()); - } - iterator_range<User::const_op_iterator> arg_operands() const { - return make_range(arg_begin(), arg_end()); - } - unsigned getNumArgOperands() const { return arg_size(); } - Value *getArgOperand(unsigned i) const { - assert(i < getNumArgOperands() && "Out of bounds!"); + assert(i < arg_size() && "Out of bounds!"); return getOperand(i); } void setArgOperand(unsigned i, Value *v) { - assert(i < getNumArgOperands() && "Out of bounds!"); + assert(i < arg_size() && "Out of bounds!"); setOperand(i, v); } /// Wrappers for getting the \c Use of a call argument. const Use &getArgOperandUse(unsigned i) const { - assert(i < getNumArgOperands() && "Out of bounds!"); + assert(i < arg_size() && "Out of bounds!"); return User::getOperandUse(i); } Use &getArgOperandUse(unsigned i) { - assert(i < getNumArgOperands() && "Out of bounds!"); + assert(i < arg_size() && "Out of bounds!"); return User::getOperandUse(i); } @@ -1485,92 +1490,104 @@ public: /// the attribute is allowed for the call. bool hasFnAttr(StringRef Kind) const { return hasFnAttrImpl(Kind); } + // TODO: remove non-AtIndex versions of these methods. /// adds the attribute to the list of attributes. - void addAttribute(unsigned i, Attribute::AttrKind Kind) { - AttributeList PAL = getAttributes(); - PAL = PAL.addAttribute(getContext(), i, Kind); - setAttributes(PAL); + void addAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) { + Attrs = Attrs.addAttributeAtIndex(getContext(), i, Kind); } /// adds the attribute to the list of attributes. - void addAttribute(unsigned i, Attribute Attr) { - AttributeList PAL = getAttributes(); - PAL = PAL.addAttribute(getContext(), i, Attr); - setAttributes(PAL); + void addAttributeAtIndex(unsigned i, Attribute Attr) { + Attrs = Attrs.addAttributeAtIndex(getContext(), i, Attr); + } + + /// Adds the attribute to the function. + void addFnAttr(Attribute::AttrKind Kind) { + Attrs = Attrs.addFnAttribute(getContext(), Kind); + } + + /// Adds the attribute to the function. + void addFnAttr(Attribute Attr) { + Attrs = Attrs.addFnAttribute(getContext(), Attr); + } + + /// Adds the attribute to the return value. + void addRetAttr(Attribute::AttrKind Kind) { + Attrs = Attrs.addRetAttribute(getContext(), Kind); + } + + /// Adds the attribute to the return value. + void addRetAttr(Attribute Attr) { + Attrs = Attrs.addRetAttribute(getContext(), Attr); } /// Adds the attribute to the indicated argument void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { - assert(ArgNo < getNumArgOperands() && "Out of bounds"); - AttributeList PAL = getAttributes(); - PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind); - setAttributes(PAL); + assert(ArgNo < arg_size() && "Out of bounds"); + Attrs = Attrs.addParamAttribute(getContext(), ArgNo, Kind); } /// Adds the attribute to the indicated argument void addParamAttr(unsigned ArgNo, Attribute Attr) { - assert(ArgNo < getNumArgOperands() && "Out of bounds"); - AttributeList PAL = getAttributes(); - PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr); - setAttributes(PAL); + assert(ArgNo < arg_size() && "Out of bounds"); + Attrs = Attrs.addParamAttribute(getContext(), ArgNo, Attr); } /// removes the attribute from the list of attributes. - void removeAttribute(unsigned i, Attribute::AttrKind Kind) { - AttributeList PAL = getAttributes(); - PAL = PAL.removeAttribute(getContext(), i, Kind); - setAttributes(PAL); + void removeAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) { + Attrs = Attrs.removeAttributeAtIndex(getContext(), i, Kind); } /// removes the attribute from the list of attributes. - void removeAttribute(unsigned i, StringRef Kind) { - AttributeList PAL = getAttributes(); - PAL = PAL.removeAttribute(getContext(), i, Kind); - setAttributes(PAL); + void removeAttributeAtIndex(unsigned i, StringRef Kind) { + Attrs = Attrs.removeAttributeAtIndex(getContext(), i, Kind); + } + + /// Removes the attributes from the function + void removeFnAttrs(const AttrBuilder &AttrsToRemove) { + Attrs = Attrs.removeFnAttributes(getContext(), AttrsToRemove); + } + + /// Removes the attribute from the function + void removeFnAttr(Attribute::AttrKind Kind) { + Attrs = Attrs.removeFnAttribute(getContext(), Kind); } - void removeAttributes(unsigned i, const AttrBuilder &Attrs) { - AttributeList PAL = getAttributes(); - PAL = PAL.removeAttributes(getContext(), i, Attrs); - setAttributes(PAL); + /// Removes the attribute from the return value + void removeRetAttr(Attribute::AttrKind Kind) { + Attrs = Attrs.removeRetAttribute(getContext(), Kind); + } + + /// Removes the attributes from the return value + void removeRetAttrs(const AttrBuilder &AttrsToRemove) { + Attrs = Attrs.removeRetAttributes(getContext(), AttrsToRemove); } /// Removes the attribute from the given argument void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) { - assert(ArgNo < getNumArgOperands() && "Out of bounds"); - AttributeList PAL = getAttributes(); - PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind); - setAttributes(PAL); + assert(ArgNo < arg_size() && "Out of bounds"); + Attrs = Attrs.removeParamAttribute(getContext(), ArgNo, Kind); } /// Removes the attribute from the given argument void removeParamAttr(unsigned ArgNo, StringRef Kind) { - assert(ArgNo < getNumArgOperands() && "Out of bounds"); - AttributeList PAL = getAttributes(); - PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind); - setAttributes(PAL); + assert(ArgNo < arg_size() && "Out of bounds"); + Attrs = Attrs.removeParamAttribute(getContext(), ArgNo, Kind); } /// Removes the attributes from the given argument - void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) { - AttributeList PAL = getAttributes(); - PAL = PAL.removeParamAttributes(getContext(), ArgNo, Attrs); - setAttributes(PAL); + void removeParamAttrs(unsigned ArgNo, const AttrBuilder &AttrsToRemove) { + Attrs = Attrs.removeParamAttributes(getContext(), ArgNo, AttrsToRemove); } /// adds the dereferenceable attribute to the list of attributes. - void addDereferenceableAttr(unsigned i, uint64_t Bytes) { - AttributeList PAL = getAttributes(); - PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes); - setAttributes(PAL); + void addDereferenceableParamAttr(unsigned i, uint64_t Bytes) { + Attrs = Attrs.addDereferenceableParamAttr(getContext(), i, Bytes); } - /// adds the dereferenceable_or_null attribute to the list of - /// attributes. - void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) { - AttributeList PAL = getAttributes(); - PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes); - setAttributes(PAL); + /// adds the dereferenceable attribute to the list of attributes. + void addDereferenceableRetAttr(uint64_t Bytes) { + Attrs = Attrs.addDereferenceableRetAttr(getContext(), Bytes); } /// Determine whether the return value has the given attribute. @@ -1584,24 +1601,34 @@ public: bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const; /// Get the attribute of a given kind at a position. - Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const { - return getAttributes().getAttribute(i, Kind); + Attribute getAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) const { + return getAttributes().getAttributeAtIndex(i, Kind); } /// Get the attribute of a given kind at a position. - Attribute getAttribute(unsigned i, StringRef Kind) const { - return getAttributes().getAttribute(i, Kind); + Attribute getAttributeAtIndex(unsigned i, StringRef Kind) const { + return getAttributes().getAttributeAtIndex(i, Kind); + } + + /// Get the attribute of a given kind for the function. + Attribute getFnAttr(StringRef Kind) const { + return getAttributes().getFnAttr(Kind); + } + + /// Get the attribute of a given kind for the function. + Attribute getFnAttr(Attribute::AttrKind Kind) const { + return getAttributes().getFnAttr(Kind); } /// Get the attribute of a given kind from a given arg Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { - assert(ArgNo < getNumArgOperands() && "Out of bounds"); + assert(ArgNo < arg_size() && "Out of bounds"); return getAttributes().getParamAttr(ArgNo, Kind); } /// Get the attribute of a given kind from a given arg Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const { - assert(ArgNo < getNumArgOperands() && "Out of bounds"); + assert(ArgNo < arg_size() && "Out of bounds"); return getAttributes().getParamAttr(ArgNo, Kind); } @@ -1609,42 +1636,35 @@ public: /// A. /// /// Data operands include call arguments and values used in operand bundles, - /// but does not include the callee operand. This routine dispatches to the - /// underlying AttributeList or the OperandBundleUser as appropriate. + /// but does not include the callee operand. /// /// The index \p i is interpreted as /// - /// \p i == Attribute::ReturnIndex -> the return value - /// \p i in [1, arg_size + 1) -> argument number (\p i - 1) - /// \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index - /// (\p i - 1) in the operand list. + /// \p i in [0, arg_size) -> argument number (\p i) + /// \p i in [arg_size, data_operand_size) -> bundle operand at index + /// (\p i) in the operand list. bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const { // Note that we have to add one because `i` isn't zero-indexed. - assert(i < (getNumArgOperands() + getNumTotalBundleOperands() + 1) && + assert(i < arg_size() + getNumTotalBundleOperands() && "Data operand index out of bounds!"); // The attribute A can either be directly specified, if the operand in // question is a call argument; or be indirectly implied by the kind of its // containing operand bundle, if the operand is a bundle operand. - if (i == AttributeList::ReturnIndex) - return hasRetAttr(Kind); - - // FIXME: Avoid these i - 1 calculations and update the API to use - // zero-based indices. - if (i < (getNumArgOperands() + 1)) - return paramHasAttr(i - 1, Kind); + if (i < arg_size()) + return paramHasAttr(i, Kind); - assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) && + assert(hasOperandBundles() && i >= getBundleOperandsStartIndex() && "Must be either a call argument or an operand bundle!"); - return bundleOperandHasAttr(i - 1, Kind); + return bundleOperandHasAttr(i, Kind); } /// Determine whether this data operand is not captured. // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to // better indicate that this may return a conservative answer. bool doesNotCapture(unsigned OpNo) const { - return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture); + return dataOperandHasImpliedAttr(OpNo, Attribute::NoCapture); } /// Determine whether this argument is passed by value. @@ -1685,21 +1705,21 @@ public: // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to // better indicate that this may return a conservative answer. bool doesNotAccessMemory(unsigned OpNo) const { - return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); + return dataOperandHasImpliedAttr(OpNo, Attribute::ReadNone); } // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to // better indicate that this may return a conservative answer. bool onlyReadsMemory(unsigned OpNo) const { - return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) || - dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); + return dataOperandHasImpliedAttr(OpNo, Attribute::ReadOnly) || + dataOperandHasImpliedAttr(OpNo, Attribute::ReadNone); } // FIXME: Once this API is no longer duplicated in `CallSite`, rename this to // better indicate that this may return a conservative answer. bool doesNotReadMemory(unsigned OpNo) const { - return dataOperandHasImpliedAttr(OpNo + 1, Attribute::WriteOnly) || - dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone); + return dataOperandHasImpliedAttr(OpNo, Attribute::WriteOnly) || + dataOperandHasImpliedAttr(OpNo, Attribute::ReadNone); } /// Extract the alignment of the return value. @@ -1743,14 +1763,26 @@ public: /// Extract the number of dereferenceable bytes for a call or /// parameter (0=unknown). - uint64_t getDereferenceableBytes(unsigned i) const { - return Attrs.getDereferenceableBytes(i); + uint64_t getRetDereferenceableBytes() const { + return Attrs.getRetDereferenceableBytes(); + } + + /// Extract the number of dereferenceable bytes for a call or + /// parameter (0=unknown). + uint64_t getParamDereferenceableBytes(unsigned i) const { + return Attrs.getParamDereferenceableBytes(i); } - /// Extract the number of dereferenceable_or_null bytes for a call or + /// Extract the number of dereferenceable_or_null bytes for a call + /// (0=unknown). + uint64_t getRetDereferenceableOrNullBytes() const { + return Attrs.getRetDereferenceableOrNullBytes(); + } + + /// Extract the number of dereferenceable_or_null bytes for a /// parameter (0=unknown). - uint64_t getDereferenceableOrNullBytes(unsigned i) const { - return Attrs.getDereferenceableOrNullBytes(i); + uint64_t getParamDereferenceableOrNullBytes(unsigned i) const { + return Attrs.getParamDereferenceableOrNullBytes(i); } /// Return true if the return value is known to be not null. @@ -1760,7 +1792,7 @@ public: /// Determine if the return value is marked with NoAlias attribute. bool returnDoesNotAlias() const { - return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias); + return Attrs.hasRetAttr(Attribute::NoAlias); } /// If one of the arguments has the 'returned' attribute, returns its @@ -1779,40 +1811,30 @@ public: /// Return true if the call should not be inlined. bool isNoInline() const { return hasFnAttr(Attribute::NoInline); } - void setIsNoInline() { - addAttribute(AttributeList::FunctionIndex, Attribute::NoInline); - } + void setIsNoInline() { addFnAttr(Attribute::NoInline); } /// Determine if the call does not access memory. bool doesNotAccessMemory() const { return hasFnAttr(Attribute::ReadNone); } - void setDoesNotAccessMemory() { - addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone); - } + void setDoesNotAccessMemory() { addFnAttr(Attribute::ReadNone); } /// Determine if the call does not access or only reads memory. bool onlyReadsMemory() const { return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly); } - void setOnlyReadsMemory() { - addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly); - } + void setOnlyReadsMemory() { addFnAttr(Attribute::ReadOnly); } /// Determine if the call does not access or only writes memory. bool doesNotReadMemory() const { return doesNotAccessMemory() || hasFnAttr(Attribute::WriteOnly); } - void setDoesNotReadMemory() { - addAttribute(AttributeList::FunctionIndex, Attribute::WriteOnly); - } + void setDoesNotReadMemory() { addFnAttr(Attribute::WriteOnly); } /// Determine if the call can access memmory only using pointers based /// on its arguments. bool onlyAccessesArgMemory() const { return hasFnAttr(Attribute::ArgMemOnly); } - void setOnlyAccessesArgMemory() { - addAttribute(AttributeList::FunctionIndex, Attribute::ArgMemOnly); - } + void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); } /// Determine if the function may only access memory that is /// inaccessible from the IR. @@ -1820,7 +1842,7 @@ public: return hasFnAttr(Attribute::InaccessibleMemOnly); } void setOnlyAccessesInaccessibleMemory() { - addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOnly); + addFnAttr(Attribute::InaccessibleMemOnly); } /// Determine if the function may only access memory that is @@ -1829,49 +1851,36 @@ public: return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly); } void setOnlyAccessesInaccessibleMemOrArgMem() { - addAttribute(AttributeList::FunctionIndex, - Attribute::InaccessibleMemOrArgMemOnly); + addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); } /// Determine if the call cannot return. bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); } - void setDoesNotReturn() { - addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn); - } + void setDoesNotReturn() { addFnAttr(Attribute::NoReturn); } /// Determine if the call should not perform indirect branch tracking. bool doesNoCfCheck() const { return hasFnAttr(Attribute::NoCfCheck); } /// Determine if the call cannot unwind. bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); } - void setDoesNotThrow() { - addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); - } + void setDoesNotThrow() { addFnAttr(Attribute::NoUnwind); } /// Determine if the invoke cannot be duplicated. bool cannotDuplicate() const { return hasFnAttr(Attribute::NoDuplicate); } - void setCannotDuplicate() { - addAttribute(AttributeList::FunctionIndex, Attribute::NoDuplicate); - } + void setCannotDuplicate() { addFnAttr(Attribute::NoDuplicate); } /// Determine if the call cannot be tail merged. bool cannotMerge() const { return hasFnAttr(Attribute::NoMerge); } - void setCannotMerge() { - addAttribute(AttributeList::FunctionIndex, Attribute::NoMerge); - } + void setCannotMerge() { addFnAttr(Attribute::NoMerge); } /// Determine if the invoke is convergent bool isConvergent() const { return hasFnAttr(Attribute::Convergent); } - void setConvergent() { - addAttribute(AttributeList::FunctionIndex, Attribute::Convergent); - } - void setNotConvergent() { - removeAttribute(AttributeList::FunctionIndex, Attribute::Convergent); - } + void setConvergent() { addFnAttr(Attribute::Convergent); } + void setNotConvergent() { removeFnAttr(Attribute::Convergent); } /// Determine if the call returns a structure through first /// pointer argument. bool hasStructRetAttr() const { - if (getNumArgOperands() == 0) + if (arg_empty()) return false; // Be friendly and also check the callee. @@ -1918,6 +1927,13 @@ public: Idx < getBundleOperandsEndIndex(); } + /// Return true if the operand at index \p Idx is a bundle operand that has + /// tag ID \p ID. + bool isOperandBundleOfType(uint32_t ID, unsigned Idx) const { + return isBundleOperand(Idx) && + getOperandBundleForOperand(Idx).getTagID() == ID; + } + /// Returns true if the use is a bundle operand. bool isBundleOperand(const Use *U) const { assert(this == U->getUser() && @@ -2258,7 +2274,7 @@ private: bool hasFnAttrOnCalledFunction(StringRef Kind) const; template <typename AttrKind> bool hasFnAttrImpl(AttrKind Kind) const { - if (Attrs.hasFnAttribute(Kind)) + if (Attrs.hasFnAttr(Kind)) return true; // Operand bundles override attributes on the called function, but don't @@ -2272,12 +2288,12 @@ private: /// Determine whether the return value has the given attribute. Supports /// Attribute::AttrKind and StringRef as \p AttrKind types. template <typename AttrKind> bool hasRetAttrImpl(AttrKind Kind) const { - if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind)) + if (Attrs.hasRetAttr(Kind)) return true; // Look at the callee, if available. if (const Function *F = getCalledFunction()) - return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind); + return F->getAttributes().hasRetAttr(Kind); return false; } }; diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index deb85cf277fe..9878082ffffa 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -59,11 +59,11 @@ protected: // Template alias so that all Instruction storing alignment use the same // definiton. // Valid alignments are powers of two from 2^0 to 2^MaxAlignmentExponent = - // 2^29. We store them as Log2(Alignment), so we need 5 bits to encode the 30 + // 2^32. We store them as Log2(Alignment), so we need 6 bits to encode the 33 // possible values. template <unsigned Offset> using AlignmentBitfieldElementT = - typename Bitfield::Element<unsigned, Offset, 5, + typename Bitfield::Element<unsigned, Offset, 6, Value::MaxAlignmentExponent>; template <unsigned Offset> @@ -307,11 +307,6 @@ public: Value::getAllMetadata(MDs); } - /// Fills the AAMDNodes structure with AA metadata from this instruction. - /// When Merge is true, the existing AA metadata is merged with that from this - /// instruction providing the most-general result. - void getAAMetadata(AAMDNodes &N, bool Merge = false) const; - /// Set the metadata of the specified kind to the specified node. This updates /// or replaces metadata if already present, or removes it if Node is null. void setMetadata(unsigned KindID, MDNode *Node); @@ -352,7 +347,10 @@ public: /// to the existing node. void addAnnotationMetadata(StringRef Annotation); - /// Sets the metadata on this instruction from the AAMDNodes structure. + /// Returns the AA metadata for this instruction. + AAMDNodes getAAMetadata() const; + + /// Sets the AA metadata on this instruction from the AAMDNodes structure. void setAAMetadata(const AAMDNodes &N); /// Retrieve the raw weight values of a conditional branch or select. @@ -389,6 +387,10 @@ public: /// Determine whether the no signed wrap flag is set. bool hasNoSignedWrap() const; + /// Return true if this operator has flags which may cause this instruction + /// to evaluate to poison despite having non-poison inputs. + bool hasPoisonGeneratingFlags() const; + /// Drops flags that may cause this instruction to evaluate to poison despite /// having non-poison inputs. void dropPoisonGeneratingFlags(); diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 0c43a56daa33..6d32a898b668 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -126,7 +126,7 @@ public: } // FIXME: Remove this one transition to Align is over. - unsigned getAlignment() const { return getAlign().value(); } + uint64_t getAlignment() const { return getAlign().value(); } /// Return true if this alloca is in the entry block of the function and is a /// constant size. If so, the code generator will fold it into the @@ -217,7 +217,7 @@ public: /// Return the alignment of the access that is being performed. /// FIXME: Remove this function once transition to Align is over. /// Use getAlign() instead. - unsigned getAlignment() const { return getAlign().value(); } + uint64_t getAlignment() const { return getAlign().value(); } /// Return the alignment of the access that is being performed. Align getAlign() const { @@ -348,7 +348,7 @@ public: /// Return the alignment of the access that is being performed /// FIXME: Remove this function once transition to Align is over. /// Use getAlign() instead. - unsigned getAlignment() const { return getAlign().value(); } + uint64_t getAlignment() const { return getAlign().value(); } Align getAlign() const { return Align(1ULL << (getSubclassData<AlignmentField>())); @@ -1339,6 +1339,10 @@ public: return P == ICMP_SLE || P == ICMP_ULE; } + /// Returns the sequence of all ICmp predicates. + /// + static auto predicates() { return ICmpPredicates(); } + /// Exchange the two operands to this instruction in such a way that it does /// not modify the semantics of the instruction. The predicate value may be /// changed to retain the same result if the predicate is order dependent @@ -1349,6 +1353,10 @@ public: Op<0>().swap(Op<1>()); } + /// Return result of `LHS Pred RHS` comparison. + static bool compare(const APInt &LHS, const APInt &RHS, + ICmpInst::Predicate Pred); + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::ICmp; @@ -1457,6 +1465,10 @@ public: Op<0>().swap(Op<1>()); } + /// Returns the sequence of all FCmp predicates. + /// + static auto predicates() { return FCmpPredicates(); } + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::FCmp; @@ -1685,9 +1697,7 @@ public: /// Return true if the call can return twice bool canReturnTwice() const { return hasFnAttr(Attribute::ReturnsTwice); } - void setCanReturnTwice() { - addAttribute(AttributeList::FunctionIndex, Attribute::ReturnsTwice); - } + void setCanReturnTwice() { addFnAttr(Attribute::ReturnsTwice); } // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { @@ -2019,6 +2029,14 @@ protected: ShuffleVectorInst *cloneImpl() const; public: + ShuffleVectorInst(Value *V1, Value *Mask, const Twine &NameStr = "", + Instruction *InsertBefore = nullptr); + ShuffleVectorInst(Value *V1, Value *Mask, const Twine &NameStr, + BasicBlock *InsertAtEnd); + ShuffleVectorInst(Value *V1, ArrayRef<int> Mask, const Twine &NameStr = "", + Instruction *InsertBefore = nullptr); + ShuffleVectorInst(Value *V1, ArrayRef<int> Mask, const Twine &NameStr, + BasicBlock *InsertAtEnd); ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, const Twine &NameStr = "", Instruction *InsertBefor = nullptr); @@ -2306,6 +2324,57 @@ public: return isExtractSubvectorMask(ShuffleMask, NumSrcElts, Index); } + /// Return true if this shuffle mask is an insert subvector mask. + /// A valid insert subvector mask inserts the lowest elements of a second + /// source operand into an in-place first source operand operand. + /// Both the sub vector width and the insertion index is returned. + static bool isInsertSubvectorMask(ArrayRef<int> Mask, int NumSrcElts, + int &NumSubElts, int &Index); + static bool isInsertSubvectorMask(const Constant *Mask, int NumSrcElts, + int &NumSubElts, int &Index) { + assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); + // Not possible to express a shuffle mask for a scalable vector for this + // case. + if (isa<ScalableVectorType>(Mask->getType())) + return false; + SmallVector<int, 16> MaskAsInts; + getShuffleMask(Mask, MaskAsInts); + return isInsertSubvectorMask(MaskAsInts, NumSrcElts, NumSubElts, Index); + } + + /// Return true if this shuffle mask is an insert subvector mask. + bool isInsertSubvectorMask(int &NumSubElts, int &Index) const { + // Not possible to express a shuffle mask for a scalable vector for this + // case. + if (isa<ScalableVectorType>(getType())) + return false; + + int NumSrcElts = + cast<FixedVectorType>(Op<0>()->getType())->getNumElements(); + return isInsertSubvectorMask(ShuffleMask, NumSrcElts, NumSubElts, Index); + } + + /// Return true if this shuffle mask replicates each of the \p VF elements + /// in a vector \p ReplicationFactor times. + /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: + /// <0,0,0,1,1,1,2,2,2,3,3,3> + static bool isReplicationMask(ArrayRef<int> Mask, int &ReplicationFactor, + int &VF); + static bool isReplicationMask(const Constant *Mask, int &ReplicationFactor, + int &VF) { + assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); + // Not possible to express a shuffle mask for a scalable vector for this + // case. + if (isa<ScalableVectorType>(Mask->getType())) + return false; + SmallVector<int, 16> MaskAsInts; + getShuffleMask(Mask, MaskAsInts); + return isReplicationMask(MaskAsInts, ReplicationFactor, VF); + } + + /// Return true if this shuffle mask is a replication mask. + bool isReplicationMask(int &ReplicationFactor, int &VF) const; + /// Change values in a shuffle permute mask assuming the two vector operands /// of length InVecNumElts have swapped position. static void commuteShuffleMask(MutableArrayRef<int> Mask, @@ -3281,14 +3350,14 @@ public: CaseHandle(SwitchInst *SI, ptrdiff_t Index) : CaseHandleImpl(SI, Index) {} /// Sets the new value for current case. - void setValue(ConstantInt *V) { + void setValue(ConstantInt *V) const { assert((unsigned)Index < SI->getNumCases() && "Index out the number of cases."); SI->setOperand(2 + Index*2, reinterpret_cast<Value*>(V)); } /// Sets the new successor for current case. - void setSuccessor(BasicBlock *S) { + void setSuccessor(BasicBlock *S) const { SI->setSuccessor(getSuccessorIndex(), S); } }; @@ -3297,7 +3366,7 @@ public: class CaseIteratorImpl : public iterator_facade_base<CaseIteratorImpl<CaseHandleT>, std::random_access_iterator_tag, - CaseHandleT> { + const CaseHandleT> { using SwitchInstT = typename CaseHandleT::SwitchInstType; CaseHandleT Case; @@ -3356,7 +3425,6 @@ public: assert(Case.SI == RHS.Case.SI && "Incompatible operators."); return Case.Index < RHS.Case.Index; } - CaseHandleT &operator*() { return Case; } const CaseHandleT &operator*() const { return Case; } }; @@ -3446,15 +3514,12 @@ public: /// default case iterator to indicate that it is handled by the default /// handler. CaseIt findCaseValue(const ConstantInt *C) { - CaseIt I = llvm::find_if( - cases(), [C](CaseHandle &Case) { return Case.getCaseValue() == C; }); - if (I != case_end()) - return I; - - return case_default(); + return CaseIt( + this, + const_cast<const SwitchInst *>(this)->findCaseValue(C)->getCaseIndex()); } ConstCaseIt findCaseValue(const ConstantInt *C) const { - ConstCaseIt I = llvm::find_if(cases(), [C](ConstCaseHandle &Case) { + ConstCaseIt I = llvm::find_if(cases(), [C](const ConstCaseHandle &Case) { return Case.getCaseValue() == C; }); if (I != case_end()) @@ -4069,14 +4134,12 @@ public: /// Value *getIndirectDestLabel(unsigned i) const { assert(i < getNumIndirectDests() && "Out of bounds!"); - return getOperand(i + getNumArgOperands() + getNumTotalBundleOperands() + - 1); + return getOperand(i + arg_size() + getNumTotalBundleOperands() + 1); } Value *getIndirectDestLabelUse(unsigned i) const { assert(i < getNumIndirectDests() && "Out of bounds!"); - return getOperandUse(i + getNumArgOperands() + getNumTotalBundleOperands() + - 1); + return getOperandUse(i + arg_size() + getNumTotalBundleOperands() + 1); } // Return the destination basic blocks... diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 6b42cb949050..d186029db8cf 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -448,6 +448,28 @@ public: static Optional<unsigned> getFunctionalOpcodeForVP(Intrinsic::ID ID); }; +/// This represents vector predication reduction intrinsics. +class VPReductionIntrinsic : public VPIntrinsic { +public: + static bool isVPReduction(Intrinsic::ID ID); + + unsigned getStartParamPos() const; + unsigned getVectorParamPos() const; + + static Optional<unsigned> getStartParamPos(Intrinsic::ID ID); + static Optional<unsigned> getVectorParamPos(Intrinsic::ID ID); + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + /// @{ + static bool classof(const IntrinsicInst *I) { + return VPReductionIntrinsic::isVPReduction(I->getIntrinsicID()); + } + static bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } + /// @} +}; + /// This is the common base class for constrained floating point intrinsics. class ConstrainedFPIntrinsic : public IntrinsicInst { public: diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h index 80a2f5a8cd3e..2ff48380ac28 100644 --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -140,7 +140,8 @@ namespace Intrinsic { Subdivide2Argument, Subdivide4Argument, VecOfBitcastsToInt, - AMX + AMX, + PPCQuad, } Kind; union { diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 28fcc13266b1..637e6d8f6cf5 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -312,6 +312,8 @@ def llvm_v1i128_ty : LLVMType<v1i128>; // 1 x i128 def llvm_v2f16_ty : LLVMType<v2f16>; // 2 x half (__fp16) def llvm_v4f16_ty : LLVMType<v4f16>; // 4 x half (__fp16) def llvm_v8f16_ty : LLVMType<v8f16>; // 8 x half (__fp16) +def llvm_v16f16_ty : LLVMType<v16f16>; // 16 x half (__fp16) +def llvm_v32f16_ty : LLVMType<v32f16>; // 32 x half (__fp16) def llvm_v2bf16_ty : LLVMType<v2bf16>; // 2 x bfloat (__bf16) def llvm_v4bf16_ty : LLVMType<v4bf16>; // 4 x bfloat (__bf16) def llvm_v8bf16_ty : LLVMType<v8bf16>; // 8 x bfloat (__bf16) @@ -1329,10 +1331,10 @@ def int_donothing : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrWillReturn]>; def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>; // The pseudoprobe intrinsic works as a place holder to the block it probes. -// Like the sideeffect intrinsic defined above, this intrinsic is treated by the -// optimizer as having opaque side effects so that it won't be get rid of or moved +// Like the sideeffect intrinsic defined above, this intrinsic is treated by the +// optimizer as having opaque side effects so that it won't be get rid of or moved // out of the block it probes. -def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], +def int_pseudoprobe : DefaultAttrsIntrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrWillReturn]>; // Arithmetic fence intrinsic. @@ -1497,12 +1499,96 @@ let IntrProperties = LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty]>; } +// Shuffles. +def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ], + [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty]>; + +// Reductions +let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in { + def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_reduce_fmul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_reduce_add : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_reduce_mul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_reduce_and : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_reduce_or : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_reduce_xor : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_reduce_smax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_reduce_smin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_reduce_umax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_reduce_umin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_reduce_fmax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; + def int_vp_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], + [LLVMVectorElementType<0>, + llvm_anyvector_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty]>; +} def int_get_active_lane_mask: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyint_ty, LLVMMatchType<1>], [IntrNoMem, IntrNoSync, IntrWillReturn]>; +def int_experimental_vp_splice: + DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_i32_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<2>>]>; + //===-------------------------- Masked Intrinsics -------------------------===// // def int_masked_load: @@ -1558,12 +1644,15 @@ def int_icall_branch_funnel : DefaultAttrsIntrinsic<[], [llvm_vararg_ty], []>; def int_load_relative: DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty], [IntrReadMem, IntrArgMemOnly]>; +def int_asan_check_memaccess : + Intrinsic<[],[llvm_ptr_ty, llvm_i32_ty], [ImmArg<ArgIndex<1>>]>; + def int_hwasan_check_memaccess : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg<ArgIndex<2>>]>; + [ImmArg<ArgIndex<2>>]>; def int_hwasan_check_memaccess_shortgranules : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg<ArgIndex<2>>]>; + [ImmArg<ArgIndex<2>>]>; // Xray intrinsics //===----------------------------------------------------------------------===// @@ -1658,7 +1747,7 @@ def int_matrix_multiply def int_matrix_column_major_load : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [LLVMPointerToElt<0>, llvm_i64_ty, llvm_i1_ty, + [LLVMPointerToElt<0>, llvm_anyint_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrReadMem, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, @@ -1667,7 +1756,7 @@ def int_matrix_column_major_load def int_matrix_column_major_store : DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMPointerToElt<0>, - llvm_i64_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty], + llvm_anyint_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrWriteMem, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>; @@ -1761,6 +1850,61 @@ def int_experimental_vector_splice : DefaultAttrsIntrinsic<[llvm_anyvector_ty], llvm_i32_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; + +//===----------------- Pointer Authentication Intrinsics ------------------===// +// + +// Sign an unauthenticated pointer using the specified key and discriminator, +// passed in that order. +// Returns the first argument, with some known bits replaced with a signature. +def int_ptrauth_sign : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + +// Authenticate a signed pointer, using the specified key and discriminator. +// Returns the first argument, with the signature bits removed. +// The signature must be valid. +def int_ptrauth_auth : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem,ImmArg<ArgIndex<1>>]>; + +// Authenticate a signed pointer and resign it. +// The second (key) and third (discriminator) arguments specify the signing +// schema used for authenticating. +// The fourth and fifth arguments specify the schema used for signing. +// The signature must be valid. +// This is a combined form of @llvm.ptrauth.sign and @llvm.ptrauth.auth, with +// an additional integrity guarantee on the intermediate value. +def int_ptrauth_resign : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, + llvm_i32_ty, llvm_i64_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>, + ImmArg<ArgIndex<3>>]>; + +// Strip the embedded signature out of a signed pointer. +// The second argument specifies the key. +// This behaves like @llvm.ptrauth.auth, but doesn't require the signature to +// be valid. +def int_ptrauth_strip : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + +// Blend a small integer discriminator with an address discriminator, producing +// a new discriminator value. +def int_ptrauth_blend : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; + +// Compute the signature of a value, using a given discriminator. +// This differs from @llvm.ptrauth.sign in that it doesn't embed the computed +// signature in the pointer, but instead returns the signature as a value. +// That allows it to be used to sign non-pointer data: in that sense, it is +// generic. There is no generic @llvm.ptrauth.auth: instead, the signature +// can be computed using @llvm.ptrauth.sign_generic, and compared with icmp. +def int_ptrauth_sign_generic : Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; + //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 87e0f83f85b7..c586af45f34d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -962,6 +962,25 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". LLVMPointerToElt<0>], [IntrReadMem, IntrArgMemOnly]>; + class AdvSIMD_2Vec_PredLoad_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMPointerToElt<0>], + [IntrReadMem, IntrArgMemOnly]>; + + class AdvSIMD_3Vec_PredLoad_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMPointerToElt<0>], + [IntrReadMem, IntrArgMemOnly]>; + + class AdvSIMD_4Vec_PredLoad_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMPointerToElt<0>], + [IntrReadMem, IntrArgMemOnly]>; + class AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, @@ -1365,7 +1384,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". // This class of intrinsics are not intended to be useful within LLVM IR but // are instead here to support some of the more regid parts of the ACLE. - class Builtin_SVCVT<string name, LLVMType OUT, LLVMType PRED, LLVMType IN> + class Builtin_SVCVT<LLVMType OUT, LLVMType PRED, LLVMType IN> : DefaultAttrsIntrinsic<[OUT], [OUT, PRED, IN], [IntrNoMem]>; } @@ -1535,6 +1554,10 @@ def int_aarch64_sve_ld2 : AdvSIMD_ManyVec_PredLoad_Intrinsic; def int_aarch64_sve_ld3 : AdvSIMD_ManyVec_PredLoad_Intrinsic; def int_aarch64_sve_ld4 : AdvSIMD_ManyVec_PredLoad_Intrinsic; +def int_aarch64_sve_ld2_sret : AdvSIMD_2Vec_PredLoad_Intrinsic; +def int_aarch64_sve_ld3_sret : AdvSIMD_3Vec_PredLoad_Intrinsic; +def int_aarch64_sve_ld4_sret : AdvSIMD_4Vec_PredLoad_Intrinsic; + def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic; def int_aarch64_sve_ldnf1 : AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic; def int_aarch64_sve_ldff1 : AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic; @@ -1957,44 +1980,44 @@ def int_aarch64_sve_fcmpgt : AdvSIMD_SVE_Compare_Intrinsic; def int_aarch64_sve_fcmpne : AdvSIMD_SVE_Compare_Intrinsic; def int_aarch64_sve_fcmpuo : AdvSIMD_SVE_Compare_Intrinsic; -def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>; -def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<"svcvt_s32_f64_m", llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; -def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<"svcvt_s64_f16_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>; -def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<"svcvt_s64_f32_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>; -def int_aarch64_sve_fcvt_bf16f32 : Builtin_SVCVT<"svcvt_bf16_f32_m", llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>; -def int_aarch64_sve_fcvtnt_bf16f32 : Builtin_SVCVT<"svcvtnt_bf16_f32_m", llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvt_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvtnt_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>; -def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<"svcvt_u32_f16_m", llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>; -def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<"svcvt_u32_f64_m", llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; -def int_aarch64_sve_fcvtzu_i64f16 : Builtin_SVCVT<"svcvt_u64_f16_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>; -def int_aarch64_sve_fcvtzu_i64f32 : Builtin_SVCVT<"svcvt_u64_f32_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtzu_i64f16 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtzu_i64f32 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>; -def int_aarch64_sve_fcvt_f16f32 : Builtin_SVCVT<"svcvt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>; -def int_aarch64_sve_fcvt_f16f64 : Builtin_SVCVT<"svcvt_f16_f64_m", llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; -def int_aarch64_sve_fcvt_f32f64 : Builtin_SVCVT<"svcvt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvt_f16f32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvt_f16f64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; -def int_aarch64_sve_fcvt_f32f16 : Builtin_SVCVT<"svcvt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>; -def int_aarch64_sve_fcvt_f64f16 : Builtin_SVCVT<"svcvt_f64_f16_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>; -def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT<"svcvt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvt_f32f16 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvt_f64f16 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>; -def int_aarch64_sve_fcvtlt_f32f16 : Builtin_SVCVT<"svcvtlt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>; -def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT<"svcvtlt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>; -def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT<"svcvtnt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>; -def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT<"svcvtnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtlt_f32f16 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>; +def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>; +def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; -def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<"svcvtx_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; -def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<"svcvtxnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; +def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>; -def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_s32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>; -def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_s64_m", llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>; -def int_aarch64_sve_scvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_s64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>; -def int_aarch64_sve_scvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_s32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>; +def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>; +def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_scvtf_f32i64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_scvtf_f64i32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>; -def int_aarch64_sve_ucvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_u32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>; -def int_aarch64_sve_ucvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_u64_m", llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>; -def int_aarch64_sve_ucvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_u64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>; -def int_aarch64_sve_ucvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_u32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>; +def int_aarch64_sve_ucvtf_f16i32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>; +def int_aarch64_sve_ucvtf_f16i64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_ucvtf_f32i64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>; +def int_aarch64_sve_ucvtf_f64i32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>; // // Predicate creation diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 46a7aeb39c9a..0a44670de76e 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -684,7 +684,14 @@ class AMDGPUDimAtomicProfile<string opmod, let IsAtomic = true; } -class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> { +class AMDGPUDimAtomicFloatProfile<string opmod, AMDGPUDimProps dim, + list<AMDGPUArg> dataargs> + : AMDGPUDimAtomicProfile<opmod, dim, dataargs> { + let RetTypes = [llvm_anyfloat_ty]; +} + +class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> + : AMDGPUDimProfile<"GET_RESINFO", dim> { let RetTypes = [llvm_anyfloat_ty]; let DataArgs = []; let AddrArgs = [AMDGPUArg<llvm_anyint_ty, "mip">]; @@ -860,17 +867,24 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = { // atomic intrinsics ////////////////////////////////////////////////////////////////////////// defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = { - multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs> { - foreach dim = AMDGPUDims.All in { - def !strconcat(NAME, "_", dim.Name) - : AMDGPUImageDimIntrinsic< - AMDGPUDimAtomicProfile<opmod, dim, dataargs>, - [], [SDNPMemOperand]>; - } + multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs, + int isFloat = 0> { + foreach dim = AMDGPUDims.All in { + def !strconcat(NAME, "_", dim.Name): AMDGPUImageDimIntrinsic< + !if (isFloat, AMDGPUDimAtomicFloatProfile<opmod, dim, dataargs>, + AMDGPUDimAtomicProfile<opmod, dim, dataargs>), + [], [SDNPMemOperand]>; + } + } + + multiclass AMDGPUImageDimAtomic<string opmod, int isFloat = 0> { + defm "" + : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">], + isFloat>; } - multiclass AMDGPUImageDimAtomic<string opmod> { - defm "" : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">]>; + multiclass AMDGPUImageDimFloatAtomic<string opmod> { + defm "" : AMDGPUImageDimAtomic<opmod, 1 /*isFloat*/>; } defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">; @@ -878,8 +892,10 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = { defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">; defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">; defm int_amdgcn_image_atomic_umin : AMDGPUImageDimAtomic<"ATOMIC_UMIN">; + defm int_amdgcn_image_atomic_fmin : AMDGPUImageDimFloatAtomic<"ATOMIC_FMIN">; defm int_amdgcn_image_atomic_smax : AMDGPUImageDimAtomic<"ATOMIC_SMAX">; defm int_amdgcn_image_atomic_umax : AMDGPUImageDimAtomic<"ATOMIC_UMAX">; + defm int_amdgcn_image_atomic_fmax : AMDGPUImageDimFloatAtomic<"ATOMIC_FMAX">; defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">; defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">; defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">; @@ -1015,8 +1031,10 @@ def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_sub : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_smin : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_umin : AMDGPURawBufferAtomic; +def int_amdgcn_raw_buffer_atomic_fmin : AMDGPURawBufferAtomic<llvm_anyfloat_ty>; def int_amdgcn_raw_buffer_atomic_smax : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic; +def int_amdgcn_raw_buffer_atomic_fmax : AMDGPURawBufferAtomic<llvm_anyfloat_ty>; def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic; def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic; @@ -1036,10 +1054,6 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic< // gfx908 intrinsic def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>; -// gfx90a intrinsics -def int_amdgcn_raw_buffer_atomic_fmin : AMDGPURawBufferAtomic<llvm_anyfloat_ty>; -def int_amdgcn_raw_buffer_atomic_fmax : AMDGPURawBufferAtomic<llvm_anyfloat_ty>; - class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic < !if(NoRtn, [], [data_ty]), [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR) @@ -1521,6 +1535,16 @@ def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; +def int_amdgcn_mulhi_i24 : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn] +>; + +def int_amdgcn_mulhi_u24 : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn] +>; + // llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id) // // bar_val is the total number of waves that will wait on this diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td index 4b4dd94b1599..a6bd6f841aab 100644 --- a/llvm/include/llvm/IR/IntrinsicsBPF.td +++ b/llvm/include/llvm/IR/IntrinsicsBPF.td @@ -34,4 +34,7 @@ let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf." [IntrNoMem]>; def int_bpf_passthrough : GCCBuiltin<"__builtin_bpf_passthrough">, Intrinsic<[llvm_any_ty], [llvm_i32_ty, llvm_any_ty], [IntrNoMem]>; + def int_bpf_compare : GCCBuiltin<"__builtin_bpf_compare">, + Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_anyint_ty, llvm_anyint_ty], + [IntrNoMem]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index cc43d23bec1c..6f55d1ef730e 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -43,7 +43,7 @@ def llvm_shared_i64ptr_ty : LLVMQualPointerType<llvm_i64_ty, 3>; // (shared)i64* // Helper class that represents a 'fragment' of an NVPTX *MMA instruction. // Geom: m<M>n<N>k<K>. E.g. m8n32k16 -// Frag: [abcd] +// Frag: [a|b|c|d] ([x1|x2|x4] for ldmatrix) // PtxEltType: PTX type for the element. class WMMA_REGS<string Geom, string Frag, string PtxEltType> { string geom = Geom; @@ -190,6 +190,11 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> { !eq(gft,"m16n8k256:b:b1") : !listsplat(llvm_i32_ty, 2), !eq(gft,"m16n8k256:c:s32") : !listsplat(llvm_i32_ty, 4), !eq(gft,"m16n8k256:d:s32") : !listsplat(llvm_i32_ty, 4), + + // ldmatrix b16 -> s32 @ m8n8 + !eq(gft,"m8n8:x1:b16") : !listsplat(llvm_i32_ty, 1), + !eq(gft,"m8n8:x2:b16") : !listsplat(llvm_i32_ty, 2), + !eq(gft,"m8n8:x4:b16") : !listsplat(llvm_i32_ty, 4), ); } @@ -256,6 +261,17 @@ class MMA_NAME<string ALayout, string BLayout, int Satfinite, string b1op, !subst("llvm.", "int_", llvm)); } +class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> { + string intr = "llvm.nvvm.ldmatrix.sync.aligned" + # "." # Frag.geom + # "." # Frag.frag + # !if(Trans, ".trans", "") + # "." # Frag.ptx_elt_type + ; + string record = !subst(".", "_", + !subst("llvm.", "int_", intr)); +} + // Generates list of 4-tuples of WMMA_REGS representing a valid MMA op. // Geom: list of supported geometries. // TypeN: PTX type of the corresponding fragment's element. @@ -286,9 +302,19 @@ class MMA_LDST_OPS<list<string> Geom, list<string> Frags, list<string> Types> { list<string> ops = !foreach(x, ret, x.gft); } -// Creates list of valid combinations of fragments. This is the master list that +class LDMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> { + list<WMMA_REGS> ret = + !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1, + !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2, + !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3, + [WMMA_REGS<geom, frag, type>])))))); + // Debugging aid for readable representation of the list above. + list<string> ops = !foreach(x, ret, x.gft); +} + +// Creates list of valid combinations of fragments. This is the main list that // drives generation of corresponding intrinsics and instructions. -class NVVM_MMA_OPS<int _ = 0> { +class NVVM_MMA_OPS { list<list<WMMA_REGS>> tf32_wmma_ops = MMA_OPS< ["m16n16k8"], ["tf32"], [], ["f32"], []>.ret; @@ -370,11 +396,14 @@ class NVVM_MMA_OPS<int _ = 0> { // Separate A/B/C fragments (loads) from D (stores). list<WMMA_REGS> all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d")); list<WMMA_REGS> all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d")); + + list<WMMA_REGS> ldmatrix_b16_ops = LDMATRIX_OPS< + ["m8n8"], ["x1", "x2", "x4"], ["b16"]>.ret; + list<WMMA_REGS> all_ldmatrix_ops = ldmatrix_b16_ops; } def NVVM_MMA_OPS : NVVM_MMA_OPS; - // Returns true if this combination of fragment and layout for WMMA load/store // ops is supported; false otherwise. // E.g. @@ -489,6 +518,23 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b ); } +// Returns true if the fragment is valid for ldmatrix ops is supported; +// false otherwise. +// E.g. +// if NVVM_LDMATRIX_SUPPORTED<...>.ret then +// def : FOO<>; // The record will only be defined for supported ops. +// +class NVVM_LDMATRIX_SUPPORTED<WMMA_REGS frag> { + string g = frag.geom; + string t = frag.ptx_elt_type; + + bit ret = !cond( + // Only currently support m8n8 and b16 + !and(!eq(g, "m8n8"), !eq(t, "b16")): true, + true: false + ); +} + class SHFL_INFO<bit sync, string mode, string type, bit return_pred> { string Suffix = !if(sync, "sync_", "") # mode # "_" @@ -511,7 +557,7 @@ class SHFL_INFO<bit sync, string mode, string type, bit return_pred> { let TargetPrefix = "nvvm" in { def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, Commutative]>; // @@ -519,150 +565,150 @@ let TargetPrefix = "nvvm" in { // def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty] - , [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty] + , [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; // // Multiplication // def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">, - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">, - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; // // Div // def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; // // Sad // def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, Commutative]>; def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, Commutative]>; // @@ -670,493 +716,493 @@ let TargetPrefix = "nvvm" in { // def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Abs // def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Round // def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Trunc // def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Saturate // def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Exp2 Log2 // def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; // // Sin Cos // def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; // // Fma // def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">, - Intrinsic<[llvm_double_ty], + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">, - Intrinsic<[llvm_double_ty], + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">, - Intrinsic<[llvm_double_ty], + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">, - Intrinsic<[llvm_double_ty], + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + [IntrNoMem, IntrSpeculatable]>; // // Rcp // def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; // // Sqrt // def int_nvvm_sqrt_f : GCCBuiltin<"__nvvm_sqrt_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; // // Rsqrt // def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; // // Add // def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; // // Convert // def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">, - Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">, - Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; // // Bitcast // def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // FNS def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Atomics not available as llvm intrinsics. @@ -1385,37 +1431,37 @@ def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], // - This complements the llvm bitcast, which can be used to cast one type // of pointer to another type of pointer, while the address space remains // the same. -def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_local_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.local.to.gen">; -def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_shared_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.shared.to.gen">; -def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_global_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.global.to.gen">; -def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_constant_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.constant.to.gen">; -def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_gen_to_global: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.gen.to.global">; -def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_gen_to_shared: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.gen.to.shared">; -def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_gen_to_local: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.gen.to.local">; -def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_gen_to_constant: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.gen.to.constant">; // Used in nvvm internally to help address space opt and ptx code generation // This is for params that are passed to kernel functions by pointer by-val. def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty], - [IntrNoMem], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.gen.to.param">; // Move intrinsics, used in nvvm internally @@ -1453,149 +1499,149 @@ def int_nvvm_reflect : // isspacep.{const, global, local, shared} def int_nvvm_isspacep_const - : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.isspacep.const">, GCCBuiltin<"__nvvm_isspacep_const">; def int_nvvm_isspacep_global - : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.isspacep.global">, GCCBuiltin<"__nvvm_isspacep_global">; def int_nvvm_isspacep_local - : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.isspacep.local">, GCCBuiltin<"__nvvm_isspacep_local">; def int_nvvm_isspacep_shared - : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.isspacep.shared">, GCCBuiltin<"__nvvm_isspacep_shared">; // Environment register read def int_nvvm_read_ptx_sreg_envreg0 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg0">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg0">; def int_nvvm_read_ptx_sreg_envreg1 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg1">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg1">; def int_nvvm_read_ptx_sreg_envreg2 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg2">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg2">; def int_nvvm_read_ptx_sreg_envreg3 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg3">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg3">; def int_nvvm_read_ptx_sreg_envreg4 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg4">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg4">; def int_nvvm_read_ptx_sreg_envreg5 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg5">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg5">; def int_nvvm_read_ptx_sreg_envreg6 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg6">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg6">; def int_nvvm_read_ptx_sreg_envreg7 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg7">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg7">; def int_nvvm_read_ptx_sreg_envreg8 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg8">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg8">; def int_nvvm_read_ptx_sreg_envreg9 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg9">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg9">; def int_nvvm_read_ptx_sreg_envreg10 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg10">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg10">; def int_nvvm_read_ptx_sreg_envreg11 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg11">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg11">; def int_nvvm_read_ptx_sreg_envreg12 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg12">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg12">; def int_nvvm_read_ptx_sreg_envreg13 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg13">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg13">; def int_nvvm_read_ptx_sreg_envreg14 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg14">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg14">; def int_nvvm_read_ptx_sreg_envreg15 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg15">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg15">; def int_nvvm_read_ptx_sreg_envreg16 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg16">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg16">; def int_nvvm_read_ptx_sreg_envreg17 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg17">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg17">; def int_nvvm_read_ptx_sreg_envreg18 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg18">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg18">; def int_nvvm_read_ptx_sreg_envreg19 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg19">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg19">; def int_nvvm_read_ptx_sreg_envreg20 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg20">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg20">; def int_nvvm_read_ptx_sreg_envreg21 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg21">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg21">; def int_nvvm_read_ptx_sreg_envreg22 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg22">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg22">; def int_nvvm_read_ptx_sreg_envreg23 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg23">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg23">; def int_nvvm_read_ptx_sreg_envreg24 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg24">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg24">; def int_nvvm_read_ptx_sreg_envreg25 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg25">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg25">; def int_nvvm_read_ptx_sreg_envreg26 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg26">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg26">; def int_nvvm_read_ptx_sreg_envreg27 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg27">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg27">; def int_nvvm_read_ptx_sreg_envreg28 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg28">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg28">; def int_nvvm_read_ptx_sreg_envreg29 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg29">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg29">; def int_nvvm_read_ptx_sreg_envreg30 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg30">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg30">; def int_nvvm_read_ptx_sreg_envreg31 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg31">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg31">; @@ -4200,49 +4246,49 @@ def int_nvvm_sust_p_3d_v4i32_trap def int_nvvm_rotate_b32 - : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem], "llvm.nvvm.rotate.b32">, + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b32">, GCCBuiltin<"__nvvm_rotate_b32">; def int_nvvm_rotate_b64 - :Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], - [IntrNoMem], "llvm.nvvm.rotate.b64">, + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b64">, GCCBuiltin<"__nvvm_rotate_b64">; def int_nvvm_rotate_right_b64 - : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], - [IntrNoMem], "llvm.nvvm.rotate.right.b64">, + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.right.b64">, GCCBuiltin<"__nvvm_rotate_right_b64">; def int_nvvm_swap_lo_hi_b64 - : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], - [IntrNoMem], "llvm.nvvm.swap.lo.hi.b64">, + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">, GCCBuiltin<"__nvvm_swap_lo_hi_b64">; // Accessing special registers. multiclass PTXReadSRegIntrinsic_v4i32<string regname> { // FIXME: Do we need the 128-bit integer type version? -// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>; +// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem, IntrSpeculatable]>; // FIXME: Enable this once v4i32 support is enabled in back-end. -// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>; +// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>; - def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">; - def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">; - def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">; - def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">; } class PTXReadSRegIntrinsic_r32<string name> - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; class PTXReadSRegIntrinsic_r64<string name> - : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>, + : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; // Intrinsics to read registers with non-constant values. E.g. the values that @@ -4519,4 +4565,20 @@ foreach layout_a = ["row", "col"] in { } // layout_b } // layout_a +// LDMATRIX +class NVVM_LDMATRIX<WMMA_REGS Frag, int Transposed> + : Intrinsic<Frag.regs, [llvm_anyptr_ty], + [IntrReadMem, IntrArgMemOnly, ReadOnly<ArgIndex<0>>, + NoCapture<ArgIndex<0>>], + LDMATRIX_NAME<Frag, Transposed>.intr>; + +foreach transposed = [0, 1] in { + foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in { + if NVVM_LDMATRIX_SUPPORTED<frag>.ret then { + def LDMATRIX_NAME<frag, transposed>.record + : NVVM_LDMATRIX<frag, transposed>; + } + } +} + } // let TargetPrefix = "nvvm" diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 92d3bdea37ed..8290342c0d51 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -31,10 +31,12 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". // Get content from current FPSCR register def int_ppc_readflm : GCCBuiltin<"__builtin_readflm">, - Intrinsic<[llvm_double_ty], [], [IntrNoMem]>; + Intrinsic<[llvm_double_ty], [], + [IntrNoMerge, IntrHasSideEffects]>; // Set FPSCR register, and return previous content def int_ppc_setflm : GCCBuiltin<"__builtin_setflm">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], []>; + Intrinsic<[llvm_double_ty], [llvm_double_ty], + [IntrHasSideEffects]>; // Intrinsics for [double]word extended forms of divide instructions def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">, @@ -50,6 +52,15 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + def int_ppc_unpack_longdouble : GCCBuiltin<"__builtin_unpack_longdouble">, + Intrinsic<[llvm_double_ty], + [llvm_ppcf128_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_ppc_pack_longdouble : GCCBuiltin<"__builtin_pack_longdouble">, + Intrinsic<[llvm_ppcf128_ty], + [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; + // Generate a random number def int_ppc_darn : GCCBuiltin<"__builtin_darn">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; @@ -1042,6 +1053,9 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.". def int_ppc_altivec_vbpermq : GCCBuiltin<"__builtin_altivec_vbpermq">, Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vbpermd : GCCBuiltin<"__builtin_altivec_vbpermd">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty], + [IntrNoMem]>; } def int_ppc_altivec_vexptefp : PowerPC_Vec_FF_Intrinsic<"vexptefp">; @@ -1626,8 +1640,7 @@ let TargetPrefix = "ppc" in { Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; // load def int_ppc_load2r - : GCCBuiltin<"__builtin_ppc_load2r">, - Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; + : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; def int_ppc_load4r : GCCBuiltin<"__builtin_ppc_load4r">, Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>; @@ -1706,7 +1719,10 @@ let TargetPrefix = "ppc" in { def int_ppc_fres : GCCBuiltin<"__builtin_ppc_fres">, Intrinsic <[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - + def int_ppc_addex + : GCCBuiltin<"__builtin_ppc_addex">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<2>>]>; def int_ppc_fsel : GCCBuiltin<"__builtin_ppc_fsel">, Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], [IntrNoMem]>; @@ -1717,6 +1733,33 @@ let TargetPrefix = "ppc" in { Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_ppc_frsqrtes : GCCBuiltin<"__builtin_ppc_frsqrtes">, Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_ppc_compare_exp_uo : GCCBuiltin<"__builtin_ppc_compare_exp_uo">, + Intrinsic<[llvm_i32_ty], + [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; + def int_ppc_compare_exp_lt : GCCBuiltin<"__builtin_ppc_compare_exp_lt">, + Intrinsic<[llvm_i32_ty], + [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; + def int_ppc_compare_exp_gt : GCCBuiltin<"__builtin_ppc_compare_exp_gt">, + Intrinsic<[llvm_i32_ty], + [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; + def int_ppc_compare_exp_eq : GCCBuiltin<"__builtin_ppc_compare_exp_eq">, + Intrinsic<[llvm_i32_ty], + [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; + def int_ppc_test_data_class_d : Intrinsic<[llvm_i32_ty], + [llvm_double_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + def int_ppc_test_data_class_f : Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_i32_ty], + [IntrNoMem, ImmArg<ArgIndex<1>>]>; + + def int_ppc_convert_f128_to_ppcf128 + : Intrinsic<[llvm_ppcf128_ty], [llvm_f128_ty], [IntrNoMem]>; + def int_ppc_convert_ppcf128_to_f128 + : Intrinsic<[llvm_f128_ty], [llvm_ppcf128_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// @@ -1738,4 +1781,11 @@ let TargetPrefix = "ppc" in { llvm_i64_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>; + def int_ppc_atomic_load_i128 : + Intrinsic<[llvm_i64_ty, llvm_i64_ty], + [llvm_ptr_ty], + [IntrArgMemOnly, IntrReadMem, NoCapture<ArgIndex<0>>]>; + def int_ppc_atomic_store_i128 : + Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty], + [IntrArgMemOnly, IntrWriteMem, NoCapture<ArgIndex<2>>]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index a46709bf09d1..3ceb347e97bf 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -159,16 +159,17 @@ let TargetPrefix = "riscv" in { [NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic; // For unit stride load with mask - // Input: (maskedoff, pointer, mask, vl) + // Input: (maskedoff, pointer, mask, vl, ta) class RISCVUSLoadMask : Intrinsic<[llvm_anyvector_ty ], [LLVMMatchType<0>, LLVMPointerType<LLVMMatchType<0>>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyint_ty], - [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic; + llvm_anyint_ty, LLVMMatchType<1>], + [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<4>>, IntrReadMem]>, + RISCVVIntrinsic; // For unit stride fault-only-first load with mask - // Input: (maskedoff, pointer, mask, vl) + // Input: (maskedoff, pointer, mask, vl, ta) // Output: (data, vl) // NOTE: We model this with default memory properties since we model writing // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work. @@ -177,8 +178,8 @@ let TargetPrefix = "riscv" in { [LLVMMatchType<0>, LLVMPointerType<LLVMMatchType<0>>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<1>], - [NoCapture<ArgIndex<1>>]>, RISCVVIntrinsic; + LLVMMatchType<1>, LLVMMatchType<1>], + [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>, RISCVVIntrinsic; // For strided load // Input: (pointer, stride, vl) class RISCVSLoad @@ -187,13 +188,15 @@ let TargetPrefix = "riscv" in { llvm_anyint_ty, LLVMMatchType<1>], [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic; // For strided load with mask - // Input: (maskedoff, pointer, stride, mask, vl) + // Input: (maskedoff, pointer, stride, mask, vl, ta) class RISCVSLoadMask : Intrinsic<[llvm_anyvector_ty ], [LLVMMatchType<0>, LLVMPointerType<LLVMMatchType<0>>, llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>], - [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic; + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>, + LLVMMatchType<1>], + [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<5>>, IntrReadMem]>, + RISCVVIntrinsic; // For indexed load // Input: (pointer, index, vl) class RISCVILoad @@ -202,13 +205,15 @@ let TargetPrefix = "riscv" in { llvm_anyvector_ty, llvm_anyint_ty], [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic; // For indexed load with mask - // Input: (maskedoff, pointer, index, mask, vl) + // Input: (maskedoff, pointer, index, mask, vl, ta) class RISCVILoadMask : Intrinsic<[llvm_anyvector_ty ], [LLVMMatchType<0>, LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic; + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<2>], + [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<5>>, IntrReadMem]>, + RISCVVIntrinsic; // For unit stride store // Input: (vector_in, pointer, vl) class RISCVUSStore @@ -265,10 +270,16 @@ let TargetPrefix = "riscv" in { [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first source vector (with mask). - // Input: (vector_in, mask, vl) + // Input: (vector_in, mask, vl, ta) class RISCVUnaryAAMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<1>], + [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic; + class RISCVUnaryAAMaskNoTA + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first and second source vector. @@ -284,12 +295,13 @@ let TargetPrefix = "riscv" in { [LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first and second source vector. - // Input: (vector_in, vector_in, int_vector_in, vl) + // Input: (vector_in, vector_in, int_vector_in, vl, ta) class RISCVRGatherVVMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic; + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<1>], + [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic; // Input: (vector_in, int16_vector_in, vl) class RISCVRGatherEI16VVNoMask : Intrinsic<[llvm_anyvector_ty], @@ -297,13 +309,14 @@ let TargetPrefix = "riscv" in { llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first and second source vector. - // Input: (vector_in, vector_in, int16_vector_in, vl) + // Input: (vector_in, vector_in, int16_vector_in, vl, ta) class RISCVRGatherEI16VVMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic; + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<1>], + [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first source vector, and the // second operand is XLen. // Input: (vector_in, xlen_in, vl) @@ -314,12 +327,13 @@ let TargetPrefix = "riscv" in { } // For destination vector type is the same as first source vector (with mask). // Second operand is XLen. - // Input: (maskedoff, vector_in, xlen_in, mask, vl) + // Input: (maskedoff, vector_in, xlen_in, mask, vl, ta) class RISCVGatherVXMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>], - [IntrNoMem]>, RISCVVIntrinsic { + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>, + LLVMMatchType<1>], + [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic { } // For destination vector type is the same as first source vector. // Input: (vector_in, vector_in/scalar_in, vl) @@ -330,12 +344,13 @@ let TargetPrefix = "riscv" in { let SplatOperand = 2; } // For destination vector type is the same as first source vector (with mask). - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) class RISCVBinaryAAXMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic { + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<2>], + [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic { let SplatOperand = 3; } // For destination vector type is the same as first source vector. The @@ -347,12 +362,13 @@ let TargetPrefix = "riscv" in { [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first source vector (with mask). // The second source operand must match the destination type or be an XLen scalar. - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) class RISCVBinaryAAShiftMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic; + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<2>], + [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is NOT the same as first source vector. // Input: (vector_in, vector_in/scalar_in, vl) class RISCVBinaryABXNoMask @@ -362,12 +378,13 @@ let TargetPrefix = "riscv" in { let SplatOperand = 2; } // For destination vector type is NOT the same as first source vector (with mask). - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) class RISCVBinaryABXMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic { + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<3>], + [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic { let SplatOperand = 3; } // For destination vector type is NOT the same as first source vector. The @@ -379,12 +396,13 @@ let TargetPrefix = "riscv" in { [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is NOT the same as first source vector (with mask). // The second source operand must match the destination type or be an XLen scalar. - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) class RISCVBinaryABShiftMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic; + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<3>], + [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic; // For binary operations with V0 as input. // Input: (vector_in, vector_in/scalar_in, V0, vl) class RISCVBinaryWithV0 @@ -461,12 +479,13 @@ let TargetPrefix = "riscv" in { } // For Saturating binary operations with mask. // The destination vector type is the same as first source vector. - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) class RISCVSaturatingBinaryAAXMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<2>], + [ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic { let SplatOperand = 3; } // For Saturating binary operations. @@ -480,12 +499,13 @@ let TargetPrefix = "riscv" in { // For Saturating binary operations with mask. // The destination vector type is the same as first source vector. // The second source operand matches the destination type or is an XLen scalar. - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) class RISCVSaturatingBinaryAAShiftMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic; + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<2>], + [ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic; // For Saturating binary operations. // The destination vector type is NOT the same as first source vector. // The second source operand matches the destination type or is an XLen scalar. @@ -497,12 +517,13 @@ let TargetPrefix = "riscv" in { // For Saturating binary operations with mask. // The destination vector type is NOT the same as first source vector (with mask). // The second source operand matches the destination type or is an XLen scalar. - // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl) + // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta) class RISCVSaturatingBinaryABShiftMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic; + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<3>], + [ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic; class RISCVTernaryAAAXNoMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, @@ -579,13 +600,13 @@ let TargetPrefix = "riscv" in { [llvm_anyvector_ty, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is NOT the same as source vector (with mask). - // Input: (maskedoff, vector_in, mask, vl) + // Input: (maskedoff, vector_in, mask, vl, ta) class RISCVUnaryABMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, - llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic; + llvm_anyint_ty, LLVMMatchType<2>], + [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic; // For unary operations with the same vector type in/out without mask // Output: (vector) // Input: (vector_in, vl) @@ -614,12 +635,13 @@ let TargetPrefix = "riscv" in { [llvm_anyvector_ty, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; // For Conversion unary operations with mask. - // Input: (maskedoff, vector_in, mask, vl) + // Input: (maskedoff, vector_in, mask, vl, ta) class RISCVConversionMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], - [IntrNoMem]>, RISCVVIntrinsic; + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty, + LLVMMatchType<2>], + [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic; // For atomic operations without mask // Input: (base, index, value, vl) class RISCVAMONoMask @@ -643,15 +665,16 @@ let TargetPrefix = "riscv" in { [LLVMPointerToElt<0>, llvm_anyint_ty], [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic; // For unit stride segment load with mask - // Input: (maskedoff, pointer, mask, vl) + // Input: (maskedoff, pointer, mask, vl, ta) class RISCVUSSegLoadMask<int nf> : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, !add(nf, -1))), !listconcat(!listsplat(LLVMMatchType<0>, nf), [LLVMPointerToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyint_ty]), - [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic; + llvm_anyint_ty, LLVMMatchType<1>]), + [ImmArg<ArgIndex<!add(nf, 3)>>, NoCapture<ArgIndex<nf>>, IntrReadMem]>, + RISCVVIntrinsic; // For unit stride fault-only-first segment load // Input: (pointer, vl) @@ -664,7 +687,7 @@ let TargetPrefix = "riscv" in { [LLVMPointerToElt<0>, LLVMMatchType<1>], [NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic; // For unit stride fault-only-first segment load with mask - // Input: (maskedoff, pointer, mask, vl) + // Input: (maskedoff, pointer, mask, vl, ta) // Output: (data, vl) // NOTE: We model this with default memory properties since we model writing // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work. @@ -674,8 +697,9 @@ let TargetPrefix = "riscv" in { !listconcat(!listsplat(LLVMMatchType<0>, nf), [LLVMPointerToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<1>]), - [NoCapture<ArgIndex<nf>>]>, RISCVVIntrinsic; + LLVMMatchType<1>, LLVMMatchType<1>]), + [ImmArg<ArgIndex<!add(nf, 3)>>, NoCapture<ArgIndex<nf>>]>, + RISCVVIntrinsic; // For stride segment load // Input: (pointer, offset, vl) @@ -685,7 +709,7 @@ let TargetPrefix = "riscv" in { [LLVMPointerToElt<0>, llvm_anyint_ty, LLVMMatchType<1>], [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic; // For stride segment load with mask - // Input: (maskedoff, pointer, offset, mask, vl) + // Input: (maskedoff, pointer, offset, mask, vl, ta) class RISCVSSegLoadMask<int nf> : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, !add(nf, -1))), @@ -693,8 +717,9 @@ let TargetPrefix = "riscv" in { [LLVMPointerToElt<0>, llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - LLVMMatchType<1>]), - [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic; + LLVMMatchType<1>, LLVMMatchType<1>]), + [ImmArg<ArgIndex<!add(nf, 4)>>, NoCapture<ArgIndex<nf>>, IntrReadMem]>, + RISCVVIntrinsic; // For indexed segment load // Input: (pointer, index, vl) @@ -704,7 +729,7 @@ let TargetPrefix = "riscv" in { [LLVMPointerToElt<0>, llvm_anyvector_ty, llvm_anyint_ty], [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic; // For indexed segment load with mask - // Input: (maskedoff, pointer, index, mask, vl) + // Input: (maskedoff, pointer, index, mask, vl, ta) class RISCVISegLoadMask<int nf> : Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>, !add(nf, -1))), @@ -712,8 +737,9 @@ let TargetPrefix = "riscv" in { [LLVMPointerToElt<0>, llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyint_ty]), - [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic; + llvm_anyint_ty, LLVMMatchType<2>]), + [ImmArg<ArgIndex<!add(nf, 4)>>, NoCapture<ArgIndex<nf>>, IntrReadMem]>, + RISCVVIntrinsic; // For unit stride segment store // Input: (value, pointer, vl) @@ -947,8 +973,8 @@ let TargetPrefix = "riscv" in { defm vsoxei : RISCVIStore; defm vsuxei : RISCVIStore; - def int_riscv_vle1 : RISCVUSLoad; - def int_riscv_vse1 : RISCVUSStore; + def int_riscv_vlm : RISCVUSLoad; + def int_riscv_vsm : RISCVUSStore; defm vamoswap : RISCVAMO; defm vamoadd : RISCVAMO; @@ -1049,7 +1075,7 @@ let TargetPrefix = "riscv" in { defm vssubu : RISCVSaturatingBinaryAAX; defm vssub : RISCVSaturatingBinaryAAX; - def int_riscv_vmerge : RISCVBinaryWithV0; + defm vmerge : RISCVBinaryWithV0; def int_riscv_vmv_v_v : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyint_ty], @@ -1124,7 +1150,7 @@ let TargetPrefix = "riscv" in { defm vrgather_vx : RISCVRGatherVX; defm vrgatherei16_vv : RISCVRGatherEI16VV; - def "int_riscv_vcompress" : RISCVUnaryAAMask; + def "int_riscv_vcompress" : RISCVUnaryAAMaskNoTA; defm vaaddu : RISCVSaturatingBinaryAAX; defm vaadd : RISCVSaturatingBinaryAAX; @@ -1159,25 +1185,25 @@ let TargetPrefix = "riscv" in { defm vwredsum : RISCVReduction; defm vfredosum : RISCVReduction; - defm vfredsum : RISCVReduction; + defm vfredusum : RISCVReduction; defm vfredmin : RISCVReduction; defm vfredmax : RISCVReduction; - defm vfwredsum : RISCVReduction; + defm vfwredusum : RISCVReduction; defm vfwredosum : RISCVReduction; def int_riscv_vmand: RISCVBinaryAAANoMask; def int_riscv_vmnand: RISCVBinaryAAANoMask; - def int_riscv_vmandnot: RISCVBinaryAAANoMask; + def int_riscv_vmandn: RISCVBinaryAAANoMask; def int_riscv_vmxor: RISCVBinaryAAANoMask; def int_riscv_vmor: RISCVBinaryAAANoMask; def int_riscv_vmnor: RISCVBinaryAAANoMask; - def int_riscv_vmornot: RISCVBinaryAAANoMask; + def int_riscv_vmorn: RISCVBinaryAAANoMask; def int_riscv_vmxnor: RISCVBinaryAAANoMask; def int_riscv_vmclr : RISCVNullaryIntrinsic; def int_riscv_vmset : RISCVNullaryIntrinsic; - defm vpopc : RISCVMaskUnarySOut; + defm vcpop : RISCVMaskUnarySOut; defm vfirst : RISCVMaskUnarySOut; defm vmsbf : RISCVMaskUnaryMOut; defm vmsof : RISCVMaskUnaryMOut; @@ -1245,4 +1271,15 @@ let TargetPrefix = "riscv" in { defm vsuxseg # nf : RISCVISegStore<nf>; } + // Strided loads/stores for fixed vectors. + def int_riscv_masked_strided_load + : Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyptr_ty, + llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [NoCapture<ArgIndex<1>>, IntrReadMem]>; + def int_riscv_masked_strided_store + : Intrinsic<[], + [llvm_anyvector_ty, llvm_anyptr_ty, + llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], + [NoCapture<ArgIndex<1>>, IntrWriteMem]>; } // TargetPrefix = "riscv" diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td index 81435e98bea0..a149b571072c 100644 --- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td +++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td @@ -144,7 +144,7 @@ multiclass SystemZBinaryCCBHF { def fs : SystemZBinaryCC<llvm_v4i32_ty>; } -multiclass SystemZCompareBHFG<string name> { +multiclass SystemZCompareBHFG { def bs : SystemZBinaryCC<llvm_v16i8_ty>; def hs : SystemZBinaryCC<llvm_v8i16_ty>; def fs : SystemZBinaryCC<llvm_v4i32_ty>; @@ -341,9 +341,9 @@ let TargetPrefix = "s390" in { def int_s390_vtm : SystemZBinaryConv<"vtm", llvm_i32_ty, llvm_v16i8_ty>; - defm int_s390_vceq : SystemZCompareBHFG<"vceq">; - defm int_s390_vch : SystemZCompareBHFG<"vch">; - defm int_s390_vchl : SystemZCompareBHFG<"vchl">; + defm int_s390_vceq : SystemZCompareBHFG; + defm int_s390_vch : SystemZCompareBHFG; + defm int_s390_vchl : SystemZCompareBHFG; defm int_s390_vfae : SystemZTernaryIntBHF<"vfae">; defm int_s390_vfae : SystemZTernaryIntCCBHF; diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index 11990554037d..6a8e6c797f85 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -50,7 +50,8 @@ def int_wasm_trunc_saturate_unsigned : Intrinsic<[llvm_anyint_ty], //===----------------------------------------------------------------------===// // throw / rethrow -// The immediate argument is an index to a tag, which is 0 for C++. +// The first immediate argument is an index to a tag, which is 0 for C++ +// exception. The second argument is the thrown exception pointer. def int_wasm_throw : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], [Throws, IntrNoReturn, ImmArg<ArgIndex<0>>]>; def int_wasm_rethrow : Intrinsic<[], [], [Throws, IntrNoReturn]>; @@ -63,8 +64,9 @@ def int_wasm_get_ehselector : Intrinsic<[llvm_i32_ty], [llvm_token_ty], [IntrHasSideEffects]>; // wasm.catch returns the pointer to the exception object caught by wasm 'catch' -// instruction. This returns a single pointer, which is sufficient for C++ -// support. The immediate argument is an index to for a tag, which is 0 for C++. +// instruction. This returns a single pointer, which is the case for C++ +// exceptions. The immediate argument is an index to for a tag, which is 0 for +// C++ exceptions. def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrHasSideEffects, ImmArg<ArgIndex<0>>]>; @@ -162,6 +164,15 @@ def int_wasm_q15mulr_sat_signed : [llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_pmin : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_pmax : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; + def int_wasm_extadd_pairwise_signed : Intrinsic<[llvm_anyvector_ty], [LLVMSubdivide2VectorType<0>], @@ -172,6 +183,59 @@ def int_wasm_extadd_pairwise_unsigned : [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// +// Relaxed SIMD intrinsics (experimental) +//===----------------------------------------------------------------------===// + +def int_wasm_fma : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_fms : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_laneselect : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_relaxed_swizzle : + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_relaxed_min : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_relaxed_max : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_relaxed_trunc_signed: + Intrinsic<[llvm_v4i32_ty], + [llvm_v4f32_ty], + [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_relaxed_trunc_unsigned: + Intrinsic<[llvm_v4i32_ty], + [llvm_v4f32_ty], + [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_relaxed_trunc_zero_signed: + Intrinsic<[llvm_v4i32_ty], + [llvm_v2f64_ty], + [IntrNoMem, IntrSpeculatable]>; + +def int_wasm_relaxed_trunc_zero_unsigned: + Intrinsic<[llvm_v4i32_ty], + [llvm_v2f64_ty], + [IntrNoMem, IntrSpeculatable]>; + + +//===----------------------------------------------------------------------===// // Thread-local storage intrinsics //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 5848356b5b1a..8de737a1c7a5 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -792,7 +792,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">, Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty], - [IntrNoMem, Commutative, ImmArg<ArgIndex<2>>]>; + [IntrNoMem, ImmArg<ArgIndex<2>>]>; } // Test instruction with bitwise comparison. @@ -1779,7 +1779,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v32i8_ty], [IntrNoMem]>; def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">, Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty, - llvm_i8_ty], [IntrNoMem, Commutative, ImmArg<ArgIndex<2>>]>; + llvm_i8_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>; } //===----------------------------------------------------------------------===// @@ -5093,6 +5093,10 @@ let TargetPrefix = "x86" in { [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_x86amx_ty, llvm_x86amx_ty, llvm_x86amx_ty], []>; + def int_x86_cast_vector_to_tile: + Intrinsic<[llvm_x86amx_ty], [llvm_anyvector_ty], [IntrNoMem]>; + def int_x86_cast_tile_to_vector: + Intrinsic<[llvm_anyvector_ty], [llvm_x86amx_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// @@ -5108,3 +5112,757 @@ let TargetPrefix = "x86" in { def int_x86_senduipi : GCCBuiltin<"__builtin_ia32_senduipi">, Intrinsic<[], [llvm_i64_ty], []>; } + +//===----------------------------------------------------------------------===// +// avx512_fp16: vaddph +let TargetPrefix = "x86" in { + def int_x86_avx512fp16_add_ph_512 + : GCCBuiltin<"__builtin_ia32_addph512">, + Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>> ]>; + def int_x86_avx512fp16_sub_ph_512 + : GCCBuiltin<"__builtin_ia32_subph512">, + Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>> ]>; + def int_x86_avx512fp16_mul_ph_512 + : GCCBuiltin<"__builtin_ia32_mulph512">, + Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>> ]>; + def int_x86_avx512fp16_div_ph_512 + : GCCBuiltin<"__builtin_ia32_divph512">, + Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>> ]>; + def int_x86_avx512fp16_max_ph_128 + : GCCBuiltin<"__builtin_ia32_maxph128">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_max_ph_256 + : GCCBuiltin<"__builtin_ia32_maxph256">, + Intrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_max_ph_512 + : GCCBuiltin<"__builtin_ia32_maxph512">, + Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>> ]>; + def int_x86_avx512fp16_min_ph_128 + : GCCBuiltin<"__builtin_ia32_minph128">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_min_ph_256 + : GCCBuiltin<"__builtin_ia32_minph256">, + Intrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_min_ph_512 + : GCCBuiltin<"__builtin_ia32_minph512">, + Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>> ]>; + + def int_x86_avx512fp16_mask_cmp_ph_512 + : Intrinsic<[ llvm_v32i1_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_v32i1_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_cmp_ph_256 + : Intrinsic<[ llvm_v16i1_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty, llvm_v16i1_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>> ]>; + def int_x86_avx512fp16_mask_cmp_ph_128 + : Intrinsic<[ llvm_v8i1_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8i1_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>> ]>; + + def int_x86_avx512fp16_mask_add_sh_round + : GCCBuiltin<"__builtin_ia32_addsh_round_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_sub_sh_round + : GCCBuiltin<"__builtin_ia32_subsh_round_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_mul_sh_round + : GCCBuiltin<"__builtin_ia32_mulsh_round_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_div_sh_round + : GCCBuiltin<"__builtin_ia32_divsh_round_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_min_sh_round + : GCCBuiltin<"__builtin_ia32_minsh_round_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_max_sh_round + : GCCBuiltin<"__builtin_ia32_maxsh_round_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_cmp_sh + : GCCBuiltin<"__builtin_ia32_cmpsh_mask">, + Intrinsic<[ llvm_i8_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_vcomi_sh + : GCCBuiltin<"__builtin_ia32_vcomish">, + Intrinsic<[ llvm_i32_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>> ]>; + + def int_x86_avx512fp16_mask_vcvtph2psx_128 + : GCCBuiltin<"__builtin_ia32_vcvtph2psx128_mask">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2psx_256 + : GCCBuiltin<"__builtin_ia32_vcvtph2psx256_mask">, + Intrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2psx_512 + : GCCBuiltin<"__builtin_ia32_vcvtph2psx512_mask">, + Intrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_vcvtps2phx_128 + : GCCBuiltin<"__builtin_ia32_vcvtps2phx128_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtps2phx_256 + : GCCBuiltin<"__builtin_ia32_vcvtps2phx256_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtps2phx_512 + : GCCBuiltin<"__builtin_ia32_vcvtps2phx512_mask">, + Intrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_vcvtpd2ph_128 + : GCCBuiltin<"__builtin_ia32_vcvtpd2ph128_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtpd2ph_256 + : GCCBuiltin<"__builtin_ia32_vcvtpd2ph256_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtpd2ph_512 + : GCCBuiltin<"__builtin_ia32_vcvtpd2ph512_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f64_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_vcvtph2pd_128 + : GCCBuiltin<"__builtin_ia32_vcvtph2pd128_mask">, + Intrinsic<[ llvm_v2f64_ty ], + [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2pd_256 + : GCCBuiltin<"__builtin_ia32_vcvtph2pd256_mask">, + Intrinsic<[ llvm_v4f64_ty ], + [ llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2pd_512 + : GCCBuiltin<"__builtin_ia32_vcvtph2pd512_mask">, + Intrinsic<[ llvm_v8f64_ty ], + [ llvm_v8f16_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_vcvtsh2ss_round + : GCCBuiltin<"__builtin_ia32_vcvtsh2ss_round_mask">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_vcvtss2sh_round + : GCCBuiltin<"__builtin_ia32_vcvtss2sh_round_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_vcvtsd2sh_round + : GCCBuiltin<"__builtin_ia32_vcvtsd2sh_round_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_vcvtsh2sd_round + : GCCBuiltin<"__builtin_ia32_vcvtsh2sd_round_mask">, + Intrinsic<[ llvm_v2f64_ty ], + [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + + def int_x86_avx512fp16_mask_vcvtph2w_128 + : GCCBuiltin<"__builtin_ia32_vcvtph2w128_mask">, + Intrinsic<[ llvm_v8i16_ty ], + [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2w_256 + : GCCBuiltin<"__builtin_ia32_vcvtph2w256_mask">, + Intrinsic<[ llvm_v16i16_ty ], + [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2w_512 + : GCCBuiltin<"__builtin_ia32_vcvtph2w512_mask">, + Intrinsic<[ llvm_v32i16_ty ], + [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_vcvttph2w_128 + : GCCBuiltin<"__builtin_ia32_vcvttph2w128_mask">, + Intrinsic<[ llvm_v8i16_ty ], + [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvttph2w_256 + : GCCBuiltin<"__builtin_ia32_vcvttph2w256_mask">, + Intrinsic<[ llvm_v16i16_ty ], + [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvttph2w_512 + : GCCBuiltin<"__builtin_ia32_vcvttph2w512_mask">, + Intrinsic<[ llvm_v32i16_ty ], + [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_vcvtph2uw_128 + : GCCBuiltin<"__builtin_ia32_vcvtph2uw128_mask">, + Intrinsic<[ llvm_v8i16_ty ], + [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2uw_256 + : GCCBuiltin<"__builtin_ia32_vcvtph2uw256_mask">, + Intrinsic<[ llvm_v16i16_ty ], + [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2uw_512 + : GCCBuiltin<"__builtin_ia32_vcvtph2uw512_mask">, + Intrinsic<[ llvm_v32i16_ty ], + [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_vcvttph2uw_128 + : GCCBuiltin<"__builtin_ia32_vcvttph2uw128_mask">, + Intrinsic<[ llvm_v8i16_ty ], + [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvttph2uw_256 + : GCCBuiltin<"__builtin_ia32_vcvttph2uw256_mask">, + Intrinsic<[ llvm_v16i16_ty ], + [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvttph2uw_512 + : GCCBuiltin<"__builtin_ia32_vcvttph2uw512_mask">, + Intrinsic<[ llvm_v32i16_ty ], + [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + + def int_x86_avx512fp16_mask_vcvtph2dq_128 + : GCCBuiltin<"__builtin_ia32_vcvtph2dq128_mask">, + Intrinsic<[ llvm_v4i32_ty ], + [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2dq_256 + : GCCBuiltin<"__builtin_ia32_vcvtph2dq256_mask">, + Intrinsic<[ llvm_v8i32_ty ], + [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2dq_512 + : GCCBuiltin<"__builtin_ia32_vcvtph2dq512_mask">, + Intrinsic<[ llvm_v16i32_ty ], + [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_vcvtph2udq_128 + : GCCBuiltin<"__builtin_ia32_vcvtph2udq128_mask">, + Intrinsic<[ llvm_v4i32_ty ], + [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2udq_256 + : GCCBuiltin<"__builtin_ia32_vcvtph2udq256_mask">, + Intrinsic<[ llvm_v8i32_ty ], + [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2udq_512 + : GCCBuiltin<"__builtin_ia32_vcvtph2udq512_mask">, + Intrinsic<[ llvm_v16i32_ty ], + [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_vcvtdq2ph_128 + : GCCBuiltin<"__builtin_ia32_vcvtdq2ph128_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtudq2ph_128 + : GCCBuiltin<"__builtin_ia32_vcvtudq2ph128_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvttph2dq_128 + : GCCBuiltin<"__builtin_ia32_vcvttph2dq128_mask">, + Intrinsic<[ llvm_v4i32_ty ], + [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvttph2dq_256 + : GCCBuiltin<"__builtin_ia32_vcvttph2dq256_mask">, + Intrinsic<[ llvm_v8i32_ty ], + [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvttph2dq_512 + : GCCBuiltin<"__builtin_ia32_vcvttph2dq512_mask">, + Intrinsic<[ llvm_v16i32_ty ], + [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_vcvttph2udq_128 + : GCCBuiltin<"__builtin_ia32_vcvttph2udq128_mask">, + Intrinsic<[ llvm_v4i32_ty ], + [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvttph2udq_256 + : GCCBuiltin<"__builtin_ia32_vcvttph2udq256_mask">, + Intrinsic<[ llvm_v8i32_ty ], + [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvttph2udq_512 + : GCCBuiltin<"__builtin_ia32_vcvttph2udq512_mask">, + Intrinsic<[ llvm_v16i32_ty ], + [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + + def int_x86_avx512fp16_mask_vcvtqq2ph_128 + : GCCBuiltin<"__builtin_ia32_vcvtqq2ph128_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtqq2ph_256 + : GCCBuiltin<"__builtin_ia32_vcvtqq2ph256_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2qq_128 + : GCCBuiltin<"__builtin_ia32_vcvtph2qq128_mask">, + Intrinsic<[ llvm_v2i64_ty ], + [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2qq_256 + : GCCBuiltin<"__builtin_ia32_vcvtph2qq256_mask">, + Intrinsic<[ llvm_v4i64_ty ], + [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2qq_512 + : GCCBuiltin<"__builtin_ia32_vcvtph2qq512_mask">, + Intrinsic<[ llvm_v8i64_ty ], + [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_vcvtuqq2ph_128 + : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph128_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtuqq2ph_256 + : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph256_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2uqq_128 + : GCCBuiltin<"__builtin_ia32_vcvtph2uqq128_mask">, + Intrinsic<[ llvm_v2i64_ty ], + [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2uqq_256 + : GCCBuiltin<"__builtin_ia32_vcvtph2uqq256_mask">, + Intrinsic<[ llvm_v4i64_ty ], + [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvtph2uqq_512 + : GCCBuiltin<"__builtin_ia32_vcvtph2uqq512_mask">, + Intrinsic<[ llvm_v8i64_ty ], + [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_vcvttph2qq_128 + : GCCBuiltin<"__builtin_ia32_vcvttph2qq128_mask">, + Intrinsic<[ llvm_v2i64_ty ], + [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvttph2qq_256 + : GCCBuiltin<"__builtin_ia32_vcvttph2qq256_mask">, + Intrinsic<[ llvm_v4i64_ty ], + [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvttph2qq_512 + : GCCBuiltin<"__builtin_ia32_vcvttph2qq512_mask">, + Intrinsic<[ llvm_v8i64_ty ], + [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_vcvttph2uqq_128 + : GCCBuiltin<"__builtin_ia32_vcvttph2uqq128_mask">, + Intrinsic<[ llvm_v2i64_ty ], + [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvttph2uqq_256 + : GCCBuiltin<"__builtin_ia32_vcvttph2uqq256_mask">, + Intrinsic<[ llvm_v4i64_ty ], + [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vcvttph2uqq_512 + : GCCBuiltin<"__builtin_ia32_vcvttph2uqq512_mask">, + Intrinsic<[ llvm_v8i64_ty ], + [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + + def int_x86_avx512fp16_vcvtsh2si32 + : GCCBuiltin<"__builtin_ia32_vcvtsh2si32">, + Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_vcvtsh2usi32 + : GCCBuiltin<"__builtin_ia32_vcvtsh2usi32">, + Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_vcvtsh2si64 + : GCCBuiltin<"__builtin_ia32_vcvtsh2si64">, + Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_vcvtsh2usi64 + : GCCBuiltin<"__builtin_ia32_vcvtsh2usi64">, + Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_vcvtusi2sh + : GCCBuiltin<"__builtin_ia32_vcvtusi2sh">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>> ]>; + def int_x86_avx512fp16_vcvtusi642sh + : GCCBuiltin<"__builtin_ia32_vcvtusi642sh">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>> ]>; + def int_x86_avx512fp16_vcvtsi2sh + : GCCBuiltin<"__builtin_ia32_vcvtsi2sh">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>> ]>; + def int_x86_avx512fp16_vcvtsi642sh + : GCCBuiltin<"__builtin_ia32_vcvtsi642sh">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>> ]>; + def int_x86_avx512fp16_vcvttsh2si32 + : GCCBuiltin<"__builtin_ia32_vcvttsh2si32">, + Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_vcvttsh2si64 + : GCCBuiltin<"__builtin_ia32_vcvttsh2si64">, + Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_vcvttsh2usi32 + : GCCBuiltin<"__builtin_ia32_vcvttsh2usi32">, + Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_vcvttsh2usi64 + : GCCBuiltin<"__builtin_ia32_vcvttsh2usi64">, + Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + + def int_x86_avx512fp16_sqrt_ph_512 + : Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_mask_sqrt_sh + : Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_rsqrt_ph_128 + : GCCBuiltin<"__builtin_ia32_rsqrtph128_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_rsqrt_ph_256 + : GCCBuiltin<"__builtin_ia32_rsqrtph256_mask">, + Intrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_rsqrt_ph_512 + : GCCBuiltin<"__builtin_ia32_rsqrtph512_mask">, + Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_rsqrt_sh + : GCCBuiltin<"__builtin_ia32_rsqrtsh_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_rcp_ph_128 + : GCCBuiltin<"__builtin_ia32_rcpph128_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_rcp_ph_256 + : GCCBuiltin<"__builtin_ia32_rcpph256_mask">, + Intrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_rcp_ph_512 + : GCCBuiltin<"__builtin_ia32_rcpph512_mask">, + Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_rcp_sh + : GCCBuiltin<"__builtin_ia32_rcpsh_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_reduce_ph_128 + : GCCBuiltin<"__builtin_ia32_reduceph128_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_mask_reduce_ph_256 + : GCCBuiltin<"__builtin_ia32_reduceph256_mask">, + Intrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_mask_reduce_ph_512 + : GCCBuiltin<"__builtin_ia32_reduceph512_mask">, + Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_reduce_sh + : GCCBuiltin<"__builtin_ia32_reducesh_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>> ]>; + def int_x86_avx512fp16_fpclass_ph_128 + : Intrinsic<[ llvm_v8i1_ty ], [ llvm_v8f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_fpclass_ph_256 + : Intrinsic<[ llvm_v16i1_ty ], [ llvm_v16f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_fpclass_ph_512 + : Intrinsic<[ llvm_v32i1_ty ], [ llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_mask_fpclass_sh + : GCCBuiltin<"__builtin_ia32_fpclasssh_mask">, + Intrinsic<[ llvm_i8_ty ], [ llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_mask_getexp_ph_128 + : GCCBuiltin<"__builtin_ia32_getexpph128_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_getexp_ph_256 + : GCCBuiltin<"__builtin_ia32_getexpph256_mask">, + Intrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_getexp_ph_512 + : GCCBuiltin<"__builtin_ia32_getexpph512_mask">, + Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_mask_getexp_sh + : GCCBuiltin<"__builtin_ia32_getexpsh128_round_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_getmant_ph_128 + : GCCBuiltin<"__builtin_ia32_getmantph128_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_mask_getmant_ph_256 + : GCCBuiltin<"__builtin_ia32_getmantph256_mask">, + Intrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_mask_getmant_ph_512 + : GCCBuiltin<"__builtin_ia32_getmantph512_mask">, + Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_getmant_sh + : GCCBuiltin<"__builtin_ia32_getmantsh_round_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, + llvm_i8_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>> ]>; + def int_x86_avx512fp16_mask_rndscale_ph_128 + : GCCBuiltin<"__builtin_ia32_rndscaleph_128_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_mask_rndscale_ph_256 + : GCCBuiltin<"__builtin_ia32_rndscaleph_256_mask">, + Intrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>> ]>; + def int_x86_avx512fp16_mask_rndscale_ph_512 + : GCCBuiltin<"__builtin_ia32_rndscaleph_mask">, + Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_rndscale_sh + : GCCBuiltin<"__builtin_ia32_rndscalesh_round_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>> ]>; + def int_x86_avx512fp16_mask_scalef_ph_128 + : GCCBuiltin<"__builtin_ia32_scalefph128_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_scalef_ph_256 + : GCCBuiltin<"__builtin_ia32_scalefph256_mask">, + Intrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_scalef_ph_512 + : GCCBuiltin<"__builtin_ia32_scalefph512_mask">, + Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_scalef_sh + : GCCBuiltin<"__builtin_ia32_scalefsh_round_mask">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + + def int_x86_avx512fp16_vfmadd_ph_512 + : Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_vfmaddsub_ph_128 + : GCCBuiltin<"__builtin_ia32_vfmaddsubph">, + Intrinsic<[ llvm_v8f16_ty ], + [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_vfmaddsub_ph_256 + : GCCBuiltin<"__builtin_ia32_vfmaddsubph256">, + Intrinsic<[ llvm_v16f16_ty ], + [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_vfmaddsub_ph_512 + : Intrinsic<[ llvm_v32f16_ty ], + [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + def int_x86_avx512fp16_vfmadd_f16 + : Intrinsic<[ llvm_half_ty ], + [ llvm_half_ty, llvm_half_ty, llvm_half_ty, llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<3>> ]>; + + def int_x86_avx512fp16_mask_vfcmadd_cph_128 + : GCCBuiltin<"__builtin_ia32_vfcmaddcph128_mask">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_maskz_vfcmadd_cph_128 + : GCCBuiltin<"__builtin_ia32_vfcmaddcph128_maskz">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vfcmadd_cph_256 + : GCCBuiltin<"__builtin_ia32_vfcmaddcph256_mask">, + Intrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_maskz_vfcmadd_cph_256 + : GCCBuiltin<"__builtin_ia32_vfcmaddcph256_maskz">, + Intrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vfcmadd_cph_512 + : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_mask3">, + Intrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_maskz_vfcmadd_cph_512 + : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_maskz">, + Intrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_vfmadd_cph_128 + : GCCBuiltin<"__builtin_ia32_vfmaddcph128_mask">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_maskz_vfmadd_cph_128 + : GCCBuiltin<"__builtin_ia32_vfmaddcph128_maskz">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vfmadd_cph_256 + : GCCBuiltin<"__builtin_ia32_vfmaddcph256_mask">, + Intrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_maskz_vfmadd_cph_256 + : GCCBuiltin<"__builtin_ia32_vfmaddcph256_maskz">, + Intrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vfmadd_cph_512 + : GCCBuiltin<"__builtin_ia32_vfmaddcph512_mask3">, + Intrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_maskz_vfmadd_cph_512 + : GCCBuiltin<"__builtin_ia32_vfmaddcph512_maskz">, + Intrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_vfmadd_csh + : GCCBuiltin<"__builtin_ia32_vfmaddcsh_mask">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_maskz_vfmadd_csh + : GCCBuiltin<"__builtin_ia32_vfmaddcsh_maskz">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_vfcmadd_csh + : GCCBuiltin<"__builtin_ia32_vfcmaddcsh_mask">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_maskz_vfcmadd_csh + : GCCBuiltin<"__builtin_ia32_vfcmaddcsh_maskz">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_vfmul_cph_128 + : GCCBuiltin<"__builtin_ia32_vfmulcph128_mask">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vfcmul_cph_128 + : GCCBuiltin<"__builtin_ia32_vfcmulcph128_mask">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vfmul_cph_256 + : GCCBuiltin<"__builtin_ia32_vfmulcph256_mask">, + Intrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vfcmul_cph_256 + : GCCBuiltin<"__builtin_ia32_vfcmulcph256_mask">, + Intrinsic<[ llvm_v8f32_ty ], + [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ], + [ IntrNoMem ]>; + def int_x86_avx512fp16_mask_vfmul_cph_512 + : GCCBuiltin<"__builtin_ia32_vfmulcph512_mask">, + Intrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_vfcmul_cph_512 + : GCCBuiltin<"__builtin_ia32_vfcmulcph512_mask">, + Intrinsic<[ llvm_v16f32_ty ], + [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_vfmul_csh + : GCCBuiltin<"__builtin_ia32_vfmulcsh_mask">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; + def int_x86_avx512fp16_mask_vfcmul_csh + : GCCBuiltin<"__builtin_ia32_vfcmulcsh_mask">, + Intrinsic<[ llvm_v4f32_ty ], + [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, + llvm_i32_ty ], + [ IntrNoMem, ImmArg<ArgIndex<4>> ]>; +} diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index bc605f108340..1c902ebce5ad 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -305,6 +305,10 @@ public: /// LLVMContext is used by compilation. void setOptPassGate(OptPassGate&); + /// Enable opaque pointers. Can only be called before creating the first + /// pointer type. + void enableOpaquePointers() const; + /// Whether typed pointers are supported. If false, all pointers are opaque. bool supportsTypedPointers() const; diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h index b14127df2182..6cc5797269e2 100644 --- a/llvm/include/llvm/IR/MatrixBuilder.h +++ b/llvm/include/llvm/IR/MatrixBuilder.h @@ -74,7 +74,7 @@ public: Value *Ops[] = {DataPtr, Stride, B.getInt1(IsVolatile), B.getInt32(Rows), B.getInt32(Columns)}; - Type *OverloadedTypes[] = {RetType}; + Type *OverloadedTypes[] = {RetType, Stride->getType()}; Function *TheFn = Intrinsic::getDeclaration( getModule(), Intrinsic::matrix_column_major_load, OverloadedTypes); @@ -82,7 +82,7 @@ public: CallInst *Call = B.CreateCall(TheFn->getFunctionType(), TheFn, Ops, Name); Attribute AlignAttr = Attribute::getWithAlignment(Call->getContext(), Alignment); - Call->addAttribute(1, AlignAttr); + Call->addParamAttr(0, AlignAttr); return Call; } @@ -97,7 +97,7 @@ public: Value *Ops[] = {Matrix, Ptr, Stride, B.getInt1(IsVolatile), B.getInt32(Rows), B.getInt32(Columns)}; - Type *OverloadedTypes[] = {Matrix->getType()}; + Type *OverloadedTypes[] = {Matrix->getType(), Stride->getType()}; Function *TheFn = Intrinsic::getDeclaration( getModule(), Intrinsic::matrix_column_major_store, OverloadedTypes); @@ -105,7 +105,7 @@ public: CallInst *Call = B.CreateCall(TheFn->getFunctionType(), TheFn, Ops, Name); Attribute AlignAttr = Attribute::getWithAlignment(Call->getContext(), Alignment); - Call->addAttribute(2, AlignAttr); + Call->addParamAttr(1, AlignAttr); return Call; } @@ -231,9 +231,23 @@ public: : (IsUnsigned ? B.CreateUDiv(LHS, RHS) : B.CreateSDiv(LHS, RHS)); } - /// Extracts the element at (\p RowIdx, \p ColumnIdx) from \p Matrix. - Value *CreateExtractElement(Value *Matrix, Value *RowIdx, Value *ColumnIdx, - unsigned NumRows, Twine const &Name = "") { + /// Create an assumption that \p Idx is less than \p NumElements. + void CreateIndexAssumption(Value *Idx, unsigned NumElements, + Twine const &Name = "") { + + Value *NumElts = + B.getIntN(Idx->getType()->getScalarSizeInBits(), NumElements); + auto *Cmp = B.CreateICmpULT(Idx, NumElts); + if (auto *ConstCond = dyn_cast<ConstantInt>(Cmp)) + assert(ConstCond->isOne() && "Index must be valid!"); + else + B.CreateAssumption(Cmp); + } + + /// Compute the index to access the element at (\p RowIdx, \p ColumnIdx) from + /// a matrix with \p NumRows embedded in a vector. + Value *CreateIndex(Value *RowIdx, Value *ColumnIdx, unsigned NumRows, + Twine const &Name = "") { unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(), ColumnIdx->getType()->getScalarSizeInBits()); @@ -241,9 +255,7 @@ public: RowIdx = B.CreateZExt(RowIdx, IntTy); ColumnIdx = B.CreateZExt(ColumnIdx, IntTy); Value *NumRowsV = B.getIntN(MaxWidth, NumRows); - return B.CreateExtractElement( - Matrix, B.CreateAdd(B.CreateMul(ColumnIdx, NumRowsV), RowIdx), - "matext"); + return B.CreateAdd(B.CreateMul(ColumnIdx, NumRowsV), RowIdx); } }; diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index c5840564454e..26d70b4db2d5 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -707,6 +707,15 @@ struct AAMDNodes { Result.NoAlias = NoAlias; return Result; } + + /// Given two sets of AAMDNodes applying to potentially different locations, + /// determine the best AAMDNodes that apply to both. + AAMDNodes merge(const AAMDNodes &Other) const; + + /// Determine the best AAMDNodes after concatenating two different locations + /// together. Different from `merge`, where different locations should + /// overlap each other, `concat` puts non-overlapping locations together. + AAMDNodes concat(const AAMDNodes &Other) const; }; // Specialize DenseMapInfo for AAMDNodes. @@ -897,6 +906,7 @@ struct TempMDNodeDeleter { class MDNode : public Metadata { friend class ReplaceableMetadataImpl; friend class LLVMContextImpl; + friend class DIArgList; unsigned NumOperands; unsigned NumUnresolved; @@ -1028,6 +1038,31 @@ public: return cast<T>(N.release()->replaceWithDistinctImpl()); } + /// Print in tree shape. + /// + /// Prints definition of \c this in tree shape. + /// + /// If \c M is provided, metadata nodes will be numbered canonically; + /// otherwise, pointer addresses are substituted. + /// @{ + void printTree(raw_ostream &OS, const Module *M = nullptr) const; + void printTree(raw_ostream &OS, ModuleSlotTracker &MST, + const Module *M = nullptr) const; + /// @} + + /// User-friendly dump in tree shape. + /// + /// If \c M is provided, metadata nodes will be numbered canonically; + /// otherwise, pointer addresses are substituted. + /// + /// Note: this uses an explicit overload instead of default arguments so that + /// the nullptr version is easy to call from a debugger. + /// + /// @{ + void dumpTree() const; + void dumpTree(const Module *M) const; + /// @} + private: MDNode *replaceWithPermanentImpl(); MDNode *replaceWithUniquedImpl(); diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index 81e29d9b86e8..bd3a196c7181 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -64,9 +64,9 @@ class VersionTuple; /// constant references to global variables in the module. When a global /// variable is destroyed, it should have no entries in the GlobalValueRefMap. /// The main container class for the LLVM Intermediate Representation. -class Module { -/// @name Types And Enumerations -/// @{ +class LLVM_EXTERNAL_VISIBILITY Module { + /// @name Types And Enumerations + /// @{ public: /// The type for the list of global variables. using GlobalListType = SymbolTableList<GlobalVariable>; @@ -324,6 +324,9 @@ public: /// name is not found. GlobalValue *getNamedValue(StringRef Name) const; + /// Return the number of global values in the module. + unsigned getNumNamedValues() const; + /// Return a unique non-zero ID for the specified metadata kind. This ID is /// uniqued across modules in the current LLVMContext. unsigned getMDKindID(StringRef Name) const; diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 4b84f6b0408d..e00b78d45c63 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -572,6 +572,50 @@ public: unsigned NoInline : 1; // Indicate if function should be always inlined. unsigned AlwaysInline : 1; + // Indicate if function never raises an exception. Can be modified during + // thinlink function attribute propagation + unsigned NoUnwind : 1; + // Indicate if function contains instructions that mayThrow + unsigned MayThrow : 1; + + // If there are calls to unknown targets (e.g. indirect) + unsigned HasUnknownCall : 1; + + FFlags &operator&=(const FFlags &RHS) { + this->ReadNone &= RHS.ReadNone; + this->ReadOnly &= RHS.ReadOnly; + this->NoRecurse &= RHS.NoRecurse; + this->ReturnDoesNotAlias &= RHS.ReturnDoesNotAlias; + this->NoInline &= RHS.NoInline; + this->AlwaysInline &= RHS.AlwaysInline; + this->NoUnwind &= RHS.NoUnwind; + this->MayThrow &= RHS.MayThrow; + this->HasUnknownCall &= RHS.HasUnknownCall; + return *this; + } + + bool anyFlagSet() { + return this->ReadNone | this->ReadOnly | this->NoRecurse | + this->ReturnDoesNotAlias | this->NoInline | this->AlwaysInline | + this->NoUnwind | this->MayThrow | this->HasUnknownCall; + } + + operator std::string() { + std::string Output; + raw_string_ostream OS(Output); + OS << "funcFlags: ("; + OS << "readNone: " << this->ReadNone; + OS << ", readOnly: " << this->ReadOnly; + OS << ", noRecurse: " << this->NoRecurse; + OS << ", returnDoesNotAlias: " << this->ReturnDoesNotAlias; + OS << ", noInline: " << this->NoInline; + OS << ", alwaysInline: " << this->AlwaysInline; + OS << ", noUnwind: " << this->NoUnwind; + OS << ", mayThrow: " << this->MayThrow; + OS << ", hasUnknownCall: " << this->HasUnknownCall; + OS << ")"; + return OS.str(); + } }; /// Describes the uses of a parameter by the function. @@ -688,6 +732,10 @@ public: /// Get function summary flags. FFlags fflags() const { return FunFlags; } + void setNoRecurse() { FunFlags.NoRecurse = true; } + + void setNoUnwind() { FunFlags.NoUnwind = true; } + /// Get the instruction count recorded for this function. unsigned instCount() const { return InstCount; } @@ -700,6 +748,8 @@ public: /// Return the list of <CalleeValueInfo, CalleeInfo> pairs. ArrayRef<EdgeTy> calls() const { return CallGraphEdgeList; } + std::vector<EdgeTy> &mutableCalls() { return CallGraphEdgeList; } + void addCall(EdgeTy E) { CallGraphEdgeList.push_back(E); } /// Returns the list of type identifiers used by this function in diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h index d0bce742cc96..b83d83f0d0ab 100644 --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -59,6 +59,10 @@ public: static bool classof(const Value *V) { return isa<Instruction>(V) || isa<ConstantExpr>(V); } + + /// Return true if this operator has flags which may cause this operator + /// to evaluate to poison despite having non-poison inputs. + bool hasPoisonGeneratingFlags() const; }; /// Utility class for integer operators which may exhibit overflow - Add, Sub, @@ -243,6 +247,9 @@ public: void operator|=(const FastMathFlags &OtherFlags) { Flags |= OtherFlags.Flags; } + bool operator!=(const FastMathFlags &OtherFlags) const { + return Flags != OtherFlags.Flags; + } }; /// Utility class for floating point operations which can have diff --git a/llvm/include/llvm/IR/OptBisect.h b/llvm/include/llvm/IR/OptBisect.h index 6c2a1b01d897..63fd98073b51 100644 --- a/llvm/include/llvm/IR/OptBisect.h +++ b/llvm/include/llvm/IR/OptBisect.h @@ -16,6 +16,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/ManagedStatic.h" +#include <limits> namespace llvm { @@ -43,14 +44,12 @@ public: /// optimization-related problems. class OptBisect : public OptPassGate { public: - /// Default constructor, initializes the OptBisect state based on the - /// -opt-bisect-limit command line argument. - /// - /// By default, bisection is disabled. - /// + /// Default constructor. Initializes the state to "disabled". The bisection + /// will be enabled by the cl::opt call-back when the command line option + /// is processed. /// Clients should not instantiate this class directly. All access should go /// through LLVMContext. - OptBisect(); + OptBisect() = default; virtual ~OptBisect() = default; @@ -60,7 +59,14 @@ public: bool shouldRunPass(const Pass *P, StringRef IRDescription) override; /// isEnabled() should return true before calling shouldRunPass(). - bool isEnabled() const override { return BisectEnabled; } + bool isEnabled() const override { return BisectLimit != Disabled; } + + /// Set the new optimization limit and reset the counter. Passing + /// OptBisect::Disabled disables the limiting. + void setLimit(int Limit) { + BisectLimit = Limit; + LastBisectNum = 0; + } /// Checks the bisect limit to determine if the specified pass should run. /// @@ -75,9 +81,11 @@ public: /// instance, function passes should call FunctionPass::skipFunction(). bool checkPass(const StringRef PassName, const StringRef TargetDesc); + static const int Disabled = std::numeric_limits<int>::max(); + private: - bool BisectEnabled = false; - unsigned LastBisectNum = 0; + int BisectLimit = Disabled; + int LastBisectNum = 0; }; /// Singleton instance of the OptBisect class, so multiple pass managers don't diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h index 8e592bfb0c78..e88d2233daba 100644 --- a/llvm/include/llvm/IR/PassManager.h +++ b/llvm/include/llvm/IR/PassManager.h @@ -377,10 +377,16 @@ template <typename DerivedT> struct PassInfoMixin { static_assert(std::is_base_of<PassInfoMixin, DerivedT>::value, "Must pass the derived type as the template argument!"); StringRef Name = getTypeName<DerivedT>(); - if (Name.startswith("llvm::")) - Name = Name.drop_front(strlen("llvm::")); + Name.consume_front("llvm::"); return Name; } + + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName) { + StringRef ClassName = DerivedT::name(); + auto PassName = MapClassName2PassName(ClassName); + OS << PassName; + } }; /// A CRTP mix-in that provides informational APIs needed for analysis passes. @@ -480,6 +486,16 @@ public: return *this; } + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName) { + for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) { + auto *P = Passes[Idx].get(); + P->printPipeline(OS, MapClassName2PassName); + if (Idx + 1 < Size) + OS << ","; + } + } + /// Run all of the passes in this manager over the given unit of IR. /// ExtraArgs are passed to each pass. PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM, @@ -520,12 +536,6 @@ public: // Finally, intersect the preserved analyses to compute the aggregate // preserved set for this pass manager. PA.intersect(std::move(PassPA)); - - // FIXME: Historically, the pass managers all called the LLVM context's - // yield function here. We don't have a generic way to acquire the - // context and it isn't yet clear what the right pattern is for yielding - // in the new pass manager so it is currently omitted. - //IR.getContext().yield(); } // Invalidation was handled after each pass in the above loop for the @@ -538,13 +548,16 @@ public: } template <typename PassT> - std::enable_if_t<!std::is_same<PassT, PassManager>::value> - addPass(PassT &&Pass) { + LLVM_ATTRIBUTE_MINSIZE + std::enable_if_t<!std::is_same<PassT, PassManager>::value> + addPass(PassT &&Pass) { using PassModelT = detail::PassModel<IRUnitT, PassT, PreservedAnalyses, AnalysisManagerT, ExtraArgTs...>; - - Passes.emplace_back(new PassModelT(std::forward<PassT>(Pass))); + // Do not use make_unique or emplace_back, they cause too many template + // instantiations, causing terrible compile times. + Passes.push_back(std::unique_ptr<PassConceptT>( + new PassModelT(std::forward<PassT>(Pass)))); } /// When adding a pass manager pass that has the same type as this pass @@ -553,10 +566,11 @@ public: /// implementation complexity and avoid potential invalidation issues that may /// happen with nested pass managers of the same type. template <typename PassT> - std::enable_if_t<std::is_same<PassT, PassManager>::value> - addPass(PassT &&Pass) { + LLVM_ATTRIBUTE_MINSIZE + std::enable_if_t<std::is_same<PassT, PassManager>::value> + addPass(PassT &&Pass) { for (auto &P : Pass.Passes) - Passes.emplace_back(std::move(P)); + Passes.push_back(std::move(P)); } /// Returns if the pass manager contains any passes. @@ -1190,29 +1204,37 @@ class ModuleToFunctionPassAdaptor public: using PassConceptT = detail::PassConcept<Function, FunctionAnalysisManager>; - explicit ModuleToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass) - : Pass(std::move(Pass)) {} + explicit ModuleToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass, + bool EagerlyInvalidate) + : Pass(std::move(Pass)), EagerlyInvalidate(EagerlyInvalidate) {} /// Runs the function pass across every function in the module. PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); static bool isRequired() { return true; } private: std::unique_ptr<PassConceptT> Pass; + bool EagerlyInvalidate; }; /// A function to deduce a function pass type and wrap it in the /// templated adaptor. template <typename FunctionPassT> ModuleToFunctionPassAdaptor -createModuleToFunctionPassAdaptor(FunctionPassT &&Pass) { +createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, + bool EagerlyInvalidate = false) { using PassModelT = detail::PassModel<Function, FunctionPassT, PreservedAnalyses, FunctionAnalysisManager>; - + // Do not use make_unique, it causes too many template instantiations, + // causing terrible compile times. return ModuleToFunctionPassAdaptor( - std::make_unique<PassModelT>(std::forward<FunctionPassT>(Pass))); + std::unique_ptr<ModuleToFunctionPassAdaptor::PassConceptT>( + new PassModelT(std::forward<FunctionPassT>(Pass))), + EagerlyInvalidate); } /// A utility pass template to force an analysis result to be available. @@ -1243,6 +1265,12 @@ struct RequireAnalysisPass return PreservedAnalyses::all(); } + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName) { + auto ClassName = AnalysisT::name(); + auto PassName = MapClassName2PassName(ClassName); + OS << "require<" << PassName << ">"; + } static bool isRequired() { return true; } }; @@ -1263,6 +1291,12 @@ struct InvalidateAnalysisPass PA.abandon<AnalysisT>(); return PA; } + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName) { + auto ClassName = AnalysisT::name(); + auto PassName = MapClassName2PassName(ClassName); + OS << "invalidate<" << PassName << ">"; + } }; /// A utility pass that does nothing, but preserves no analyses. @@ -1312,6 +1346,13 @@ public: return PA; } + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName) { + OS << "repeat<" << Count << ">("; + P.printPipeline(OS, MapClassName2PassName); + OS << ")"; + } + private: int Count; PassT P; diff --git a/llvm/include/llvm/IR/PassManagerInternal.h b/llvm/include/llvm/IR/PassManagerInternal.h index 8f42e69f3063..29b55a8172e6 100644 --- a/llvm/include/llvm/IR/PassManagerInternal.h +++ b/llvm/include/llvm/IR/PassManagerInternal.h @@ -46,6 +46,9 @@ struct PassConcept { virtual PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM, ExtraArgTs... ExtraArgs) = 0; + virtual void + printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName) = 0; /// Polymorphic method to access the name of a pass. virtual StringRef name() const = 0; @@ -85,6 +88,12 @@ struct PassModel : PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...> { return Pass.run(IR, AM, ExtraArgs...); } + void printPipeline( + raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName) override { + Pass.printPipeline(OS, MapClassName2PassName); + } + StringRef name() const override { return PassT::name(); } template <typename T> diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index cbd429f84ee4..b858733530e3 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -438,7 +438,7 @@ inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() { } struct is_all_ones { - bool isValue(const APInt &C) { return C.isAllOnesValue(); } + bool isValue(const APInt &C) { return C.isAllOnes(); } }; /// Match an integer or vector with all bits set. /// For vectors, this includes constants with undefined elements. @@ -506,7 +506,7 @@ inline cst_pred_ty<is_nonpositive> m_NonPositive() { inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; } struct is_one { - bool isValue(const APInt &C) { return C.isOneValue(); } + bool isValue(const APInt &C) { return C.isOne(); } }; /// Match an integer 1 or a vector with all elements equal to 1. /// For vectors, this includes constants with undefined elements. @@ -515,7 +515,7 @@ inline cst_pred_ty<is_one> m_One() { } struct is_zero_int { - bool isValue(const APInt &C) { return C.isNullValue(); } + bool isValue(const APInt &C) { return C.isZero(); } }; /// Match an integer 0 or a vector with all elements equal to 0. /// For vectors, this includes constants with undefined elements. @@ -549,7 +549,7 @@ inline api_pred_ty<is_power2> m_Power2(const APInt *&V) { } struct is_negated_power2 { - bool isValue(const APInt &C) { return (-C).isPowerOf2(); } + bool isValue(const APInt &C) { return C.isNegatedPowerOf2(); } }; /// Match a integer or vector negated power-of-2. /// For vectors, this includes constants with undefined elements. @@ -593,32 +593,7 @@ inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() { struct icmp_pred_with_threshold { ICmpInst::Predicate Pred; const APInt *Thr; - bool isValue(const APInt &C) { - switch (Pred) { - case ICmpInst::Predicate::ICMP_EQ: - return C.eq(*Thr); - case ICmpInst::Predicate::ICMP_NE: - return C.ne(*Thr); - case ICmpInst::Predicate::ICMP_UGT: - return C.ugt(*Thr); - case ICmpInst::Predicate::ICMP_UGE: - return C.uge(*Thr); - case ICmpInst::Predicate::ICMP_ULT: - return C.ult(*Thr); - case ICmpInst::Predicate::ICMP_ULE: - return C.ule(*Thr); - case ICmpInst::Predicate::ICMP_SGT: - return C.sgt(*Thr); - case ICmpInst::Predicate::ICMP_SGE: - return C.sge(*Thr); - case ICmpInst::Predicate::ICMP_SLT: - return C.slt(*Thr); - case ICmpInst::Predicate::ICMP_SLE: - return C.sle(*Thr); - default: - llvm_unreachable("Unhandled ICmp predicate"); - } - } + bool isValue(const APInt &C) { return ICmpInst::compare(C, *Thr, Pred); } }; /// Match an integer or vector with every element comparing 'pred' (eg/ne/...) /// to Threshold. For vectors, this includes constants with undefined elements. @@ -988,20 +963,22 @@ struct BinaryOp_match { // The LHS is always matched first. BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {} - template <typename OpTy> bool match(OpTy *V) { - if (V->getValueID() == Value::InstructionVal + Opcode) { + template <typename OpTy> inline bool match(unsigned Opc, OpTy *V) { + if (V->getValueID() == Value::InstructionVal + Opc) { auto *I = cast<BinaryOperator>(V); return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) || (Commutable && L.match(I->getOperand(1)) && R.match(I->getOperand(0))); } if (auto *CE = dyn_cast<ConstantExpr>(V)) - return CE->getOpcode() == Opcode && + return CE->getOpcode() == Opc && ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) || (Commutable && L.match(CE->getOperand(1)) && R.match(CE->getOperand(0)))); return false; } + + template <typename OpTy> bool match(OpTy *V) { return match(Opcode, V); } }; template <typename LHS, typename RHS> @@ -1246,6 +1223,26 @@ m_NUWShl(const LHS &L, const RHS &R) { L, R); } +template <typename LHS_t, typename RHS_t, bool Commutable = false> +struct SpecificBinaryOp_match + : public BinaryOp_match<LHS_t, RHS_t, 0, Commutable> { + unsigned Opcode; + + SpecificBinaryOp_match(unsigned Opcode, const LHS_t &LHS, const RHS_t &RHS) + : BinaryOp_match<LHS_t, RHS_t, 0, Commutable>(LHS, RHS), Opcode(Opcode) {} + + template <typename OpTy> bool match(OpTy *V) { + return BinaryOp_match<LHS_t, RHS_t, 0, Commutable>::match(Opcode, V); + } +}; + +/// Matches a specific opcode. +template <typename LHS, typename RHS> +inline SpecificBinaryOp_match<LHS, RHS> m_BinOp(unsigned Opcode, const LHS &L, + const RHS &R) { + return SpecificBinaryOp_match<LHS, RHS>(Opcode, L, R); +} + //===----------------------------------------------------------------------===// // Class that matches a group of binary opcodes. // @@ -2223,6 +2220,13 @@ m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) { R); } +/// Matches a specific opcode with LHS and RHS in either order. +template <typename LHS, typename RHS> +inline SpecificBinaryOp_match<LHS, RHS, true> +m_c_BinOp(unsigned Opcode, const LHS &L, const RHS &R) { + return SpecificBinaryOp_match<LHS, RHS, true>(Opcode, L, R); +} + /// Matches a Add with LHS and RHS in either order. template <typename LHS, typename RHS> inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L, @@ -2456,7 +2460,7 @@ inline VScaleVal_match m_VScale(const DataLayout &DL) { return VScaleVal_match(DL); } -template <typename LHS, typename RHS, unsigned Opcode> +template <typename LHS, typename RHS, unsigned Opcode, bool Commutable = false> struct LogicalOp_match { LHS L; RHS R; @@ -2464,27 +2468,32 @@ struct LogicalOp_match { LogicalOp_match(const LHS &L, const RHS &R) : L(L), R(R) {} template <typename T> bool match(T *V) { - if (auto *I = dyn_cast<Instruction>(V)) { - if (!I->getType()->isIntOrIntVectorTy(1)) - return false; + auto *I = dyn_cast<Instruction>(V); + if (!I || !I->getType()->isIntOrIntVectorTy(1)) + return false; - if (I->getOpcode() == Opcode && L.match(I->getOperand(0)) && - R.match(I->getOperand(1))) - return true; + if (I->getOpcode() == Opcode) { + auto *Op0 = I->getOperand(0); + auto *Op1 = I->getOperand(1); + return (L.match(Op0) && R.match(Op1)) || + (Commutable && L.match(Op1) && R.match(Op0)); + } - if (auto *SI = dyn_cast<SelectInst>(I)) { - if (Opcode == Instruction::And) { - if (const auto *C = dyn_cast<Constant>(SI->getFalseValue())) - if (C->isNullValue() && L.match(SI->getCondition()) && - R.match(SI->getTrueValue())) - return true; - } else { - assert(Opcode == Instruction::Or); - if (const auto *C = dyn_cast<Constant>(SI->getTrueValue())) - if (C->isOneValue() && L.match(SI->getCondition()) && - R.match(SI->getFalseValue())) - return true; - } + if (auto *Select = dyn_cast<SelectInst>(I)) { + auto *Cond = Select->getCondition(); + auto *TVal = Select->getTrueValue(); + auto *FVal = Select->getFalseValue(); + if (Opcode == Instruction::And) { + auto *C = dyn_cast<Constant>(FVal); + if (C && C->isNullValue()) + return (L.match(Cond) && R.match(TVal)) || + (Commutable && L.match(TVal) && R.match(Cond)); + } else { + assert(Opcode == Instruction::Or); + auto *C = dyn_cast<Constant>(TVal); + if (C && C->isOneValue()) + return (L.match(Cond) && R.match(FVal)) || + (Commutable && L.match(FVal) && R.match(Cond)); } } @@ -2503,6 +2512,13 @@ m_LogicalAnd(const LHS &L, const RHS &R) { /// Matches L && R where L and R are arbitrary values. inline auto m_LogicalAnd() { return m_LogicalAnd(m_Value(), m_Value()); } +/// Matches L && R with LHS and RHS in either order. +template <typename LHS, typename RHS> +inline LogicalOp_match<LHS, RHS, Instruction::And, true> +m_c_LogicalAnd(const LHS &L, const RHS &R) { + return LogicalOp_match<LHS, RHS, Instruction::And, true>(L, R); +} + /// Matches L || R either in the form of L | R or L ? true : R. /// Note that the latter form is poison-blocking. template <typename LHS, typename RHS> @@ -2512,8 +2528,13 @@ m_LogicalOr(const LHS &L, const RHS &R) { } /// Matches L || R where L and R are arbitrary values. -inline auto m_LogicalOr() { - return m_LogicalOr(m_Value(), m_Value()); +inline auto m_LogicalOr() { return m_LogicalOr(m_Value(), m_Value()); } + +/// Matches L || R with LHS and RHS in either order. +template <typename LHS, typename RHS> +inline LogicalOp_match<LHS, RHS, Instruction::Or, true> +m_c_LogicalOr(const LHS &L, const RHS &R) { + return LogicalOp_match<LHS, RHS, Instruction::Or, true>(L, R); } } // end namespace PatternMatch diff --git a/llvm/include/llvm/IR/ProfileSummary.h b/llvm/include/llvm/IR/ProfileSummary.h index 889568e7946b..4bb6bb8d4a40 100644 --- a/llvm/include/llvm/IR/ProfileSummary.h +++ b/llvm/include/llvm/IR/ProfileSummary.h @@ -31,9 +31,9 @@ class raw_ostream; // number of counts needed to reach this target and the minimum among these // counts. struct ProfileSummaryEntry { - uint32_t Cutoff; ///< The required percentile of counts. - uint64_t MinCount; ///< The minimum count for this percentile. - uint64_t NumCounts; ///< Number of counts >= the minimum count. + const uint32_t Cutoff; ///< The required percentile of counts. + const uint64_t MinCount; ///< The minimum count for this percentile. + const uint64_t NumCounts; ///< Number of counts >= the minimum count. ProfileSummaryEntry(uint32_t TheCutoff, uint64_t TheMinCount, uint64_t TheNumCounts) @@ -48,9 +48,9 @@ public: private: const Kind PSK; - SummaryEntryVector DetailedSummary; - uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount; - uint32_t NumCounts, NumFunctions; + const SummaryEntryVector DetailedSummary; + const uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount; + const uint32_t NumCounts, NumFunctions; /// If 'Partial' is false, it means the profile being used to optimize /// a target is collected from the same target. /// If 'Partial' is true, it means the profile is for common/shared @@ -61,14 +61,14 @@ private: /// of the program being built to the number of profile counters in the /// partial sample profile. When 'Partial' is false, it is undefined. This is /// currently only available under thin LTO mode. - double PartialProfileRatio = 0; + double PartialProfileRatio = 0.0; /// Return detailed summary as metadata. Metadata *getDetailedSummaryMD(LLVMContext &Context); public: static const int Scale = 1000000; - ProfileSummary(Kind K, SummaryEntryVector DetailedSummary, + ProfileSummary(Kind K, const SummaryEntryVector &DetailedSummary, uint64_t TotalCount, uint64_t MaxCount, uint64_t MaxInternalCount, uint64_t MaxFunctionCount, uint32_t NumCounts, uint32_t NumFunctions, @@ -85,22 +85,22 @@ public: bool AddPartialProfileRatioField = true); /// Construct profile summary from metdata. static ProfileSummary *getFromMD(Metadata *MD); - SummaryEntryVector &getDetailedSummary() { return DetailedSummary; } - uint32_t getNumFunctions() { return NumFunctions; } - uint64_t getMaxFunctionCount() { return MaxFunctionCount; } - uint32_t getNumCounts() { return NumCounts; } - uint64_t getTotalCount() { return TotalCount; } - uint64_t getMaxCount() { return MaxCount; } - uint64_t getMaxInternalCount() { return MaxInternalCount; } + const SummaryEntryVector &getDetailedSummary() { return DetailedSummary; } + uint32_t getNumFunctions() const { return NumFunctions; } + uint64_t getMaxFunctionCount() const { return MaxFunctionCount; } + uint32_t getNumCounts() const { return NumCounts; } + uint64_t getTotalCount() const { return TotalCount; } + uint64_t getMaxCount() const { return MaxCount; } + uint64_t getMaxInternalCount() const { return MaxInternalCount; } void setPartialProfile(bool PP) { Partial = PP; } - bool isPartialProfile() { return Partial; } - double getPartialProfileRatio() { return PartialProfileRatio; } + bool isPartialProfile() const { return Partial; } + double getPartialProfileRatio() const { return PartialProfileRatio; } void setPartialProfileRatio(double R) { assert(isPartialProfile() && "Unexpected when not partial profile"); PartialProfileRatio = R; } - void printSummary(raw_ostream &OS); - void printDetailedSummary(raw_ostream &OS); + void printSummary(raw_ostream &OS) const; + void printDetailedSummary(raw_ostream &OS) const; }; } // end namespace llvm diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h index 53100f049910..51ba7e675efe 100644 --- a/llvm/include/llvm/IR/PseudoProbe.h +++ b/llvm/include/llvm/IR/PseudoProbe.h @@ -27,10 +27,6 @@ constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc"; enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall }; -enum class PseudoProbeAttributes { - Reserved = 0x1, // Reserved for future use. -}; - // The saturated distrution factor representing 100% for block probes. constexpr static uint64_t PseudoProbeFullDistributionFactor = std::numeric_limits<uint64_t>::max(); diff --git a/llvm/include/llvm/IR/ReplaceConstant.h b/llvm/include/llvm/IR/ReplaceConstant.h index 4d95143a4bd2..5ad1d0a6f920 100644 --- a/llvm/include/llvm/IR/ReplaceConstant.h +++ b/llvm/include/llvm/IR/ReplaceConstant.h @@ -21,10 +21,6 @@ namespace llvm { -/// Create a replacement instruction for constant expression \p CE and insert -/// it before \p Instr. -Instruction *createReplacementInstr(ConstantExpr *CE, Instruction *Instr); - /// The given instruction \p I contains given constant expression \p CE as one /// of its operands, possibly nested within constant expression trees. Convert /// all reachable paths from contant expression operands of \p I to \p CE into diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index c73172612b1e..62d67308114f 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -287,6 +287,7 @@ HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2") HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2") HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2") HANDLE_LIBCALL(FPEXT_F16_F128, "__extendhftf2") +HANDLE_LIBCALL(FPEXT_F16_F80, "__extendhfxf2") HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2") HANDLE_LIBCALL(FPEXT_F16_F64, "__extendhfdf2") HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee") @@ -375,6 +376,8 @@ HANDLE_LIBCALL(UINTTOFP_I128_F64, "__floatuntidf") HANDLE_LIBCALL(UINTTOFP_I128_F80, "__floatuntixf") HANDLE_LIBCALL(UINTTOFP_I128_F128, "__floatuntitf") HANDLE_LIBCALL(UINTTOFP_I128_PPCF128, "__floatuntitf") +HANDLE_LIBCALL(CONVERT_F128_PPCF128, "__extendkftf2") +HANDLE_LIBCALL(CONVERT_PPCF128_F128, "__trunctfkf2") // Comparison HANDLE_LIBCALL(OEQ_F32, "__eqsf2") @@ -431,6 +434,7 @@ HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memset_element_unorde // Exception handling HANDLE_LIBCALL(UNWIND_RESUME, "_Unwind_Resume") +HANDLE_LIBCALL(CXA_END_CLEANUP, "__cxa_end_cleanup") // Note: there are two sets of atomics libcalls; see // <https://llvm.org/docs/Atomics.html> for more info on the diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index 430bc34a47e7..47431adc6fac 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -14,7 +14,6 @@ #ifndef LLVM_IR_TYPE_H #define LLVM_IR_TYPE_H -#include "llvm/ADT/APFloat.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/CBindingWrapping.h" @@ -29,6 +28,7 @@ namespace llvm { class IntegerType; +struct fltSemantics; class LLVMContext; class PointerType; class raw_ostream; @@ -166,18 +166,7 @@ public: getTypeID() == PPC_FP128TyID; } - const fltSemantics &getFltSemantics() const { - switch (getTypeID()) { - case HalfTyID: return APFloat::IEEEhalf(); - case BFloatTyID: return APFloat::BFloat(); - case FloatTyID: return APFloat::IEEEsingle(); - case DoubleTyID: return APFloat::IEEEdouble(); - case X86_FP80TyID: return APFloat::x87DoubleExtended(); - case FP128TyID: return APFloat::IEEEquad(); - case PPC_FP128TyID: return APFloat::PPCDoubleDouble(); - default: llvm_unreachable("Invalid floating type"); - } - } + const fltSemantics &getFltSemantics() const; /// Return true if this is X86 MMX. bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; } @@ -312,7 +301,7 @@ public: /// Return whether the type is IEEE compatible, as defined by the eponymous /// method in APFloat. - bool isIEEE() const { return APFloat::getZero(getFltSemantics()).isIEEE(); } + bool isIEEE() const; /// If this is a vector type, return the element type, otherwise return /// 'this'. @@ -443,26 +432,7 @@ public: } llvm_unreachable("Unsupported type in Type::getScalarTy"); } - static Type *getFloatingPointTy(LLVMContext &C, const fltSemantics &S) { - Type *Ty; - if (&S == &APFloat::IEEEhalf()) - Ty = Type::getHalfTy(C); - else if (&S == &APFloat::BFloat()) - Ty = Type::getBFloatTy(C); - else if (&S == &APFloat::IEEEsingle()) - Ty = Type::getFloatTy(C); - else if (&S == &APFloat::IEEEdouble()) - Ty = Type::getDoubleTy(C); - else if (&S == &APFloat::x87DoubleExtended()) - Ty = Type::getX86_FP80Ty(C); - else if (&S == &APFloat::IEEEquad()) - Ty = Type::getFP128Ty(C); - else { - assert(&S == &APFloat::PPCDoubleDouble() && "Unknown FP format"); - Ty = Type::getPPC_FP128Ty(C); - } - return Ty; - } + static Type *getFloatingPointTy(LLVMContext &C, const fltSemantics &S); //===--------------------------------------------------------------------===// // Convenience methods for getting pointer types with one of the above builtin diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 92e2cd3a2783..361d6357b303 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -111,6 +111,21 @@ END_REGISTER_VP_SDNODE(SDOPC) #define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS) #endif +// Map this VP reduction intrinsic to its reduction operand positions. +#ifndef HANDLE_VP_REDUCTION +#define HANDLE_VP_REDUCTION(ID, STARTPOS, VECTORPOS) +#endif + +// A property to infer VP binary-op SDNode opcodes automatically. +#ifndef PROPERTY_VP_BINARYOP_SDNODE +#define PROPERTY_VP_BINARYOP_SDNODE(ID) +#endif + +// A property to infer VP reduction SDNode opcodes automatically. +#ifndef PROPERTY_VP_REDUCTION_SDNODE +#define PROPERTY_VP_REDUCTION_SDNODE(ID) +#endif + /// } Property Macros ///// Integer Arithmetic { @@ -122,6 +137,7 @@ END_REGISTER_VP_SDNODE(SDOPC) #define HELPER_REGISTER_BINARY_INT_VP(INTRIN, SDOPC, OPC) \ BEGIN_REGISTER_VP(INTRIN, 2, 3, SDOPC, -1) \ HANDLE_VP_TO_OPC(OPC) \ +PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \ END_REGISTER_VP(INTRIN, SDOPC) @@ -181,6 +197,7 @@ HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor) BEGIN_REGISTER_VP(vp_##OPSUFFIX, 2, 3, SDOPC, -1) \ HANDLE_VP_TO_OPC(OPC) \ HANDLE_VP_TO_CONSTRAINEDFP(1, 1, experimental_constrained_##OPSUFFIX) \ + PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \ END_REGISTER_VP(vp_##OPSUFFIX, SDOPC) // llvm.vp.fadd(x,y,mask,vlen) @@ -204,33 +221,146 @@ HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem) ///// Memory Operations { // llvm.vp.store(ptr,val,mask,vlen) -BEGIN_REGISTER_VP(vp_store, 2, 3, VP_STORE, 0) +BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3) +// chain = VP_STORE chain,val,base,offset,mask,evl +BEGIN_REGISTER_VP_SDNODE(VP_STORE, 0, vp_store, 4, 5) HANDLE_VP_TO_OPC(Store) HANDLE_VP_TO_INTRIN(masked_store) HANDLE_VP_IS_MEMOP(vp_store, 1, 0) END_REGISTER_VP(vp_store, VP_STORE) // llvm.vp.scatter(ptr,val,mask,vlen) -BEGIN_REGISTER_VP(vp_scatter, 2, 3, VP_SCATTER, 0) +BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3) +// chain = VP_SCATTER chain,val,base,indices,scale,mask,evl +BEGIN_REGISTER_VP_SDNODE(VP_SCATTER, -1, vp_scatter, 5, 6) HANDLE_VP_TO_INTRIN(masked_scatter) HANDLE_VP_IS_MEMOP(vp_scatter, 1, 0) END_REGISTER_VP(vp_scatter, VP_SCATTER) // llvm.vp.load(ptr,mask,vlen) -BEGIN_REGISTER_VP(vp_load, 1, 2, VP_LOAD, -1) +BEGIN_REGISTER_VP_INTRINSIC(vp_load, 1, 2) +// val,chain = VP_LOAD chain,base,offset,mask,evl +BEGIN_REGISTER_VP_SDNODE(VP_LOAD, -1, vp_load, 3, 4) HANDLE_VP_TO_OPC(Load) HANDLE_VP_TO_INTRIN(masked_load) HANDLE_VP_IS_MEMOP(vp_load, 0, None) END_REGISTER_VP(vp_load, VP_LOAD) // llvm.vp.gather(ptr,mask,vlen) -BEGIN_REGISTER_VP(vp_gather, 1, 2, VP_GATHER, -1) +BEGIN_REGISTER_VP_INTRINSIC(vp_gather, 1, 2) +// val,chain = VP_GATHER chain,base,indices,scale,mask,evl +BEGIN_REGISTER_VP_SDNODE(VP_GATHER, -1, vp_gather, 4, 5) HANDLE_VP_TO_INTRIN(masked_gather) HANDLE_VP_IS_MEMOP(vp_gather, 0, None) END_REGISTER_VP(vp_gather, VP_GATHER) ///// } Memory Operations +///// Reductions { + +// Specialized helper macro for VP reductions (%start, %x, %mask, %evl). +#ifdef HELPER_REGISTER_REDUCTION_VP +#error "The internal helper macro HELPER_REGISTER_REDUCTION_VP is already defined!" +#endif +#define HELPER_REGISTER_REDUCTION_VP(VPINTRIN, SDOPC, INTRIN) \ +BEGIN_REGISTER_VP(VPINTRIN, 2, 3, SDOPC, -1) \ +HANDLE_VP_TO_INTRIN(INTRIN) \ +HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \ +PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \ +END_REGISTER_VP(VPINTRIN, SDOPC) + +// llvm.vp.reduce.add(start,x,mask,vlen) +HELPER_REGISTER_REDUCTION_VP(vp_reduce_add, VP_REDUCE_ADD, + experimental_vector_reduce_add) + +// llvm.vp.reduce.mul(start,x,mask,vlen) +HELPER_REGISTER_REDUCTION_VP(vp_reduce_mul, VP_REDUCE_MUL, + experimental_vector_reduce_mul) + +// llvm.vp.reduce.and(start,x,mask,vlen) +HELPER_REGISTER_REDUCTION_VP(vp_reduce_and, VP_REDUCE_AND, + experimental_vector_reduce_and) + +// llvm.vp.reduce.or(start,x,mask,vlen) +HELPER_REGISTER_REDUCTION_VP(vp_reduce_or, VP_REDUCE_OR, + experimental_vector_reduce_or) + +// llvm.vp.reduce.xor(start,x,mask,vlen) +HELPER_REGISTER_REDUCTION_VP(vp_reduce_xor, VP_REDUCE_XOR, + experimental_vector_reduce_xor) + +// llvm.vp.reduce.smax(start,x,mask,vlen) +HELPER_REGISTER_REDUCTION_VP(vp_reduce_smax, VP_REDUCE_SMAX, + experimental_vector_reduce_smax) + +// llvm.vp.reduce.smin(start,x,mask,vlen) +HELPER_REGISTER_REDUCTION_VP(vp_reduce_smin, VP_REDUCE_SMIN, + experimental_vector_reduce_smin) + +// llvm.vp.reduce.umax(start,x,mask,vlen) +HELPER_REGISTER_REDUCTION_VP(vp_reduce_umax, VP_REDUCE_UMAX, + experimental_vector_reduce_umax) + +// llvm.vp.reduce.umin(start,x,mask,vlen) +HELPER_REGISTER_REDUCTION_VP(vp_reduce_umin, VP_REDUCE_UMIN, + experimental_vector_reduce_umin) + +// llvm.vp.reduce.fmax(start,x,mask,vlen) +HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmax, VP_REDUCE_FMAX, + experimental_vector_reduce_fmax) + +// llvm.vp.reduce.fmin(start,x,mask,vlen) +HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN, + experimental_vector_reduce_fmin) + +#undef HELPER_REGISTER_REDUCTION_VP + +// Specialized helper macro for VP reductions as above but with two forms: +// sequential and reassociative. These manifest as the presence of 'reassoc' +// fast-math flags in the IR and as two distinct ISD opcodes in the +// SelectionDAG. +#ifdef HELPER_REGISTER_REDUCTION_SEQ_VP +#error "The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!" +#endif +#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPINTRIN, SDOPC, SEQ_SDOPC, INTRIN) \ +BEGIN_REGISTER_VP_INTRINSIC(VPINTRIN, 2, 3) \ +BEGIN_REGISTER_VP_SDNODE(SDOPC, -1, VPINTRIN, 2, 3) \ +END_REGISTER_VP_SDNODE(SDOPC) \ +BEGIN_REGISTER_VP_SDNODE(SEQ_SDOPC, -1, VPINTRIN, 2, 3) \ +END_REGISTER_VP_SDNODE(SEQ_SDOPC) \ +HANDLE_VP_TO_INTRIN(INTRIN) \ +HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \ +PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \ +PROPERTY_VP_REDUCTION_SDNODE(SEQ_SDOPC) \ +END_REGISTER_VP_INTRINSIC(VPINTRIN) + +// llvm.vp.reduce.fadd(start,x,mask,vlen) +HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fadd, VP_REDUCE_FADD, + VP_REDUCE_SEQ_FADD, + experimental_vector_reduce_fadd) + +// llvm.vp.reduce.fmul(start,x,mask,vlen) +HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fmul, VP_REDUCE_FMUL, + VP_REDUCE_SEQ_FMUL, + experimental_vector_reduce_fmul) + +#undef HELPER_REGISTER_REDUCTION_SEQ_VP + +///// } Reduction + +///// Shuffles { + +// llvm.vp.select(mask,on_true,on_false,vlen) +BEGIN_REGISTER_VP_INTRINSIC(vp_select, 0, 3) +// BEGIN_REGISTER_VP_SDNODE(VP_SELECT, -1, vp_select, 0, 4) +// END_REGISTER_CASES(vp_select, VP_SELECT) +END_REGISTER_VP_INTRINSIC(vp_select) + +BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, + EXPERIMENTAL_VP_SPLICE, -1) +END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE) + +///// } Shuffles #undef BEGIN_REGISTER_VP #undef BEGIN_REGISTER_VP_INTRINSIC @@ -242,3 +372,6 @@ END_REGISTER_VP(vp_gather, VP_GATHER) #undef HANDLE_VP_TO_CONSTRAINEDFP #undef HANDLE_VP_TO_INTRIN #undef HANDLE_VP_IS_MEMOP +#undef HANDLE_VP_REDUCTION +#undef PROPERTY_VP_BINARYOP_SDNODE +#undef PROPERTY_VP_REDUCTION_SDNODE diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h index 2ad1c9e8c300..fc2ed00d770f 100644 --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -37,7 +37,6 @@ class DataLayout; class Function; class GlobalAlias; class GlobalIFunc; -class GlobalIndirectSymbol; class GlobalObject; class GlobalValue; class GlobalVariable; @@ -454,14 +453,18 @@ public: /// Return true if there is exactly one use of this value that cannot be /// dropped. - /// - /// This is specialized because it is a common request and does not require - /// traversing the whole use list. Use *getSingleUndroppableUse(); const Use *getSingleUndroppableUse() const { return const_cast<Value *>(this)->getSingleUndroppableUse(); } + /// Return true if there is exactly one unique user of this value that cannot be + /// dropped (that user can have multiple uses of this value). + User *getUniqueUndroppableUser(); + const User *getUniqueUndroppableUser() const { + return const_cast<Value *>(this)->getUniqueUndroppableUser(); + } + /// Return true if there this value. /// /// This is specialized because it is a common request and does not require @@ -690,6 +693,9 @@ public: /// If \p AllowNonInbounds is true, offsets in GEPs are stripped and /// accumulated even if the GEP is not "inbounds". /// + /// If \p AllowInvariantGroup is true then this method also looks through + /// strip.invariant.group and launder.invariant.group intrinsics. + /// /// If \p ExternalAnalysis is provided it will be used to calculate a offset /// when a operand of GEP is not constant. /// For example, for a value \p ExternalAnalysis might try to calculate a @@ -705,13 +711,15 @@ public: /// is unchanged. const Value *stripAndAccumulateConstantOffsets( const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, + bool AllowInvariantGroup = false, function_ref<bool(Value &Value, APInt &Offset)> ExternalAnalysis = nullptr) const; Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, - bool AllowNonInbounds) { + bool AllowNonInbounds, + bool AllowInvariantGroup = false) { return const_cast<Value *>( static_cast<const Value *>(this)->stripAndAccumulateConstantOffsets( - DL, Offset, AllowNonInbounds)); + DL, Offset, AllowNonInbounds, AllowInvariantGroup)); } /// This is a wrapper around stripAndAccumulateConstantOffsets with the @@ -781,8 +789,8 @@ public: /// /// This is the greatest alignment value supported by load, store, and alloca /// instructions, and global values. - static const unsigned MaxAlignmentExponent = 29; - static const unsigned MaximumAlignment = 1u << MaxAlignmentExponent; + static constexpr unsigned MaxAlignmentExponent = 32; + static constexpr uint64_t MaximumAlignment = 1ULL << MaxAlignmentExponent; /// Mutate the type of this Value to be of the specified type. /// @@ -1012,21 +1020,16 @@ template <> struct isa_impl<GlobalIFunc, Value> { } }; -template <> struct isa_impl<GlobalIndirectSymbol, Value> { - static inline bool doit(const Value &Val) { - return isa<GlobalAlias>(Val) || isa<GlobalIFunc>(Val); - } -}; - template <> struct isa_impl<GlobalValue, Value> { static inline bool doit(const Value &Val) { - return isa<GlobalObject>(Val) || isa<GlobalIndirectSymbol>(Val); + return isa<GlobalObject>(Val) || isa<GlobalAlias>(Val); } }; template <> struct isa_impl<GlobalObject, Value> { static inline bool doit(const Value &Val) { - return isa<GlobalVariable>(Val) || isa<Function>(Val); + return isa<GlobalVariable>(Val) || isa<Function>(Val) || + isa<GlobalIFunc>(Val); } }; diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 365240de321a..845d7dcdebd2 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -64,6 +64,7 @@ void initializeAAEvalLegacyPassPass(PassRegistry&); void initializeAAResultsWrapperPassPass(PassRegistry&); void initializeADCELegacyPassPass(PassRegistry&); void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&); +void initializeAddFSDiscriminatorsPass(PassRegistry &); void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &); void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &); void initializeAddressSanitizerLegacyPassPass(PassRegistry &); @@ -183,6 +184,7 @@ void initializeGlobalSplitPass(PassRegistry&); void initializeGlobalsAAWrapperPassPass(PassRegistry&); void initializeGuardWideningLegacyPassPass(PassRegistry&); void initializeHardwareLoopsPass(PassRegistry&); +void initializeMIRProfileLoaderPassPass(PassRegistry &); void initializeMemProfilerLegacyPassPass(PassRegistry &); void initializeHotColdSplittingLegacyPassPass(PassRegistry&); void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &); @@ -234,7 +236,8 @@ void initializeLiveIntervalsPass(PassRegistry&); void initializeLiveRangeShrinkPass(PassRegistry&); void initializeLiveRegMatrixPass(PassRegistry&); void initializeLiveStacksPass(PassRegistry&); -void initializeLiveVariablesPass(PassRegistry&); +void initializeLiveVariablesPass(PassRegistry &); +void initializeLoadStoreOptPass(PassRegistry &); void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry&); void initializeLoaderPassPass(PassRegistry&); void initializeLocalStackSlotPassPass(PassRegistry&); diff --git a/llvm/include/llvm/InterfaceStub/IFSHandler.h b/llvm/include/llvm/InterfaceStub/IFSHandler.h index de627492366f..6ae6a421318e 100644 --- a/llvm/include/llvm/InterfaceStub/IFSHandler.h +++ b/llvm/include/llvm/InterfaceStub/IFSHandler.h @@ -51,6 +51,9 @@ Error validateIFSTarget(IFSStub &Stub, bool ParseTriple); void stripIFSTarget(IFSStub &Stub, bool StripTriple, bool StripArch, bool StripEndianness, bool StripBitWidth); +/// Strips symbols from IFS symbol table that are undefined. +void stripIFSUndefinedSymbols(IFSStub &Stub); + /// Parse llvm triple string into a IFSTarget struct. IFSTarget parseTriple(StringRef TripleStr); diff --git a/llvm/include/llvm/LTO/Caching.h b/llvm/include/llvm/LTO/Caching.h deleted file mode 100644 index 43b978328b74..000000000000 --- a/llvm/include/llvm/LTO/Caching.h +++ /dev/null @@ -1,38 +0,0 @@ -//===- Caching.h - LLVM Link Time Optimizer Configuration -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the localCache function, which allows clients to add a -// filesystem cache to ThinLTO. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LTO_CACHING_H -#define LLVM_LTO_CACHING_H - -#include "llvm/LTO/LTO.h" - -namespace llvm { -namespace lto { - -/// This type defines the callback to add a pre-existing native object file -/// (e.g. in a cache). -/// -/// Buffer callbacks must be thread safe. -using AddBufferFn = - std::function<void(unsigned Task, std::unique_ptr<MemoryBuffer> MB)>; - -/// Create a local file system cache which uses the given cache directory and -/// file callback. This function also creates the cache directory if it does not -/// already exist. -Expected<NativeObjectCache> localCache(StringRef CacheDirectoryPath, - AddBufferFn AddBuffer); - -} // namespace lto -} // namespace llvm - -#endif diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h index 5fd3c9f408f3..eb793d62907e 100644 --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -70,6 +70,9 @@ struct Config { /// Run PGO context sensitive IR instrumentation. bool RunCSIRInstr = false; + /// Turn on/off the warning about a hash mismatch in the PGO profile data. + bool PGOWarnMismatch = true; + /// Asserts whether we can assume whole program visibility during the LTO /// link. bool HasWholeProgramVisibility = false; diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index ea1dea2d6f42..d2b0fef1ca47 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -21,8 +21,10 @@ #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/LTO/Config.h" #include "llvm/Object/IRSymtab.h" +#include "llvm/Support/Caching.h" #include "llvm/Support/Error.h" #include "llvm/Support/thread.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" namespace llvm { @@ -38,7 +40,7 @@ class ToolOutputFile; /// Resolve linkage for prevailing symbols in the \p Index. Linkage changes /// recorded in the index and the ThinLTO backends must apply the changes to -/// the module via thinLTOResolvePrevailingInModule. +/// the module via thinLTOFinalizeInModule. /// /// This is done for correctness (if value exported, ensure we always /// emit a copy), and compile-time optimization (allow drop of duplicates). @@ -186,47 +188,13 @@ private: } }; -/// This class wraps an output stream for a native object. Most clients should -/// just be able to return an instance of this base class from the stream -/// callback, but if a client needs to perform some action after the stream is -/// written to, that can be done by deriving from this class and overriding the -/// destructor. -class NativeObjectStream { -public: - NativeObjectStream(std::unique_ptr<raw_pwrite_stream> OS) : OS(std::move(OS)) {} - std::unique_ptr<raw_pwrite_stream> OS; - virtual ~NativeObjectStream() = default; -}; - -/// This type defines the callback to add a native object that is generated on -/// the fly. -/// -/// Stream callbacks must be thread safe. -using AddStreamFn = - std::function<std::unique_ptr<NativeObjectStream>(unsigned Task)>; - -/// This is the type of a native object cache. To request an item from the -/// cache, pass a unique string as the Key. For hits, the cached file will be -/// added to the link and this function will return AddStreamFn(). For misses, -/// the cache will return a stream callback which must be called at most once to -/// produce content for the stream. The native object stream produced by the -/// stream callback will add the file to the link after the stream is written -/// to. -/// -/// Clients generally look like this: -/// -/// if (AddStreamFn AddStream = Cache(Task, Key)) -/// ProduceContent(AddStream); -using NativeObjectCache = - std::function<AddStreamFn(unsigned Task, StringRef Key)>; - /// A ThinBackend defines what happens after the thin-link phase during ThinLTO. /// The details of this type definition aren't important; clients can only /// create a ThinBackend using one of the create*ThinBackend() functions below. using ThinBackend = std::function<std::unique_ptr<ThinBackendProc>( const Config &C, ModuleSummaryIndex &CombinedIndex, StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, NativeObjectCache Cache)>; + AddStreamFn AddStream, FileCache Cache)>; /// This ThinBackend runs the individual backend jobs in-process. /// The default value means to use one job per hardware core (not hyper-thread). @@ -299,7 +267,7 @@ public: /// /// The client will receive at most one callback (via either AddStream or /// Cache) for each task identifier. - Error run(AddStreamFn AddStream, NativeObjectCache Cache = nullptr); + Error run(AddStreamFn AddStream, FileCache Cache = nullptr); /// Static method that returns a list of libcall symbols that can be generated /// by LTO but might not be visible from bitcode symbol table. @@ -431,7 +399,7 @@ private: const SymbolResolution *&ResI, const SymbolResolution *ResE); Error runRegularLTO(AddStreamFn AddStream); - Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, + Error runThinLTO(AddStreamFn AddStream, FileCache Cache, const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols); Error checkPartiallySplit(); @@ -444,6 +412,9 @@ private: // Identify symbols exported dynamically, and that therefore could be // referenced by a shared library not visible to the linker. DenseSet<GlobalValue::GUID> DynamicExportSymbols; + + // Diagnostic optimization remarks file + std::unique_ptr<ToolOutputFile> DiagnosticOutputFile; }; /// The resolution for a symbol. The linker must provide a SymbolResolution for diff --git a/llvm/include/llvm/LTO/SummaryBasedOptimizations.h b/llvm/include/llvm/LTO/SummaryBasedOptimizations.h index 6697c821a5ea..508ab2587ac5 100644 --- a/llvm/include/llvm/LTO/SummaryBasedOptimizations.h +++ b/llvm/include/llvm/LTO/SummaryBasedOptimizations.h @@ -10,6 +10,8 @@ #define LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H namespace llvm { class ModuleSummaryIndex; + +/// Compute synthetic function entry counts. void computeSyntheticCounts(ModuleSummaryIndex &Index); } // namespace llvm diff --git a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h index 31688e43e174..333f483f29c5 100644 --- a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h +++ b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h @@ -176,7 +176,7 @@ struct LTOCodeGenerator { /// created using the \p AddStream callback. Returns true on success. /// /// Calls \a verifyMergedModuleOnce(). - bool compileOptimized(lto::AddStreamFn AddStream, unsigned ParallelismLevel); + bool compileOptimized(AddStreamFn AddStream, unsigned ParallelismLevel); /// Enable the Freestanding mode: indicate that the optimizer should not /// assume builtins are present on the target. diff --git a/llvm/include/llvm/LTO/legacy/LTOModule.h b/llvm/include/llvm/LTO/legacy/LTOModule.h index 2a25dab58ada..01e63db4bab3 100644 --- a/llvm/include/llvm/LTO/legacy/LTOModule.h +++ b/llvm/include/llvm/LTO/legacy/LTOModule.h @@ -167,6 +167,10 @@ public: Expected<uint32_t> getMachOCPUSubType() const; + /// Returns true if the module has either the @llvm.global_ctors or the + /// @llvm.global_dtors symbol. Otherwise returns false. + bool hasCtorDtor() const; + private: /// Parse metadata from the module // FIXME: it only parses "llvm.linker.options" metadata at the moment diff --git a/llvm/include/llvm/LinkAllIR.h b/llvm/include/llvm/LinkAllIR.h index 4b0aabeee701..ceed784d557d 100644 --- a/llvm/include/llvm/LinkAllIR.h +++ b/llvm/include/llvm/LinkAllIR.h @@ -38,6 +38,9 @@ namespace { // delete it all as dead code, even with whole program optimization, // yet is effectively a NO-OP. As the compiler isn't smart enough // to know that getenv() never returns -1, this will do the job. + // This is so that globals in the translation units where these functions + // are defined are forced to be initialized, populating various + // registries. if (std::getenv("bar") != (char*) -1) return; llvm::LLVMContext Context; diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 45978828a8ce..c8b9aaeed76a 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -64,6 +64,9 @@ namespace { // delete it all as dead code, even with whole program optimization, // yet is effectively a NO-OP. As the compiler isn't smart enough // to know that getenv() never returns -1, this will do the job. + // This is so that globals in the translation units where these functions + // are defined are forced to be initialized, populating various + // registries. if (std::getenv("bar") != (char*) -1) return; diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h index 08739d51f751..bb57c3453d10 100644 --- a/llvm/include/llvm/MC/MCAsmBackend.h +++ b/llvm/include/llvm/MC/MCAsmBackend.h @@ -55,7 +55,8 @@ public: /// Give the target a chance to manipulate state related to instruction /// alignment (e.g. padding for optimization), instruction relaxablility, etc. /// before and after actually emitting the instruction. - virtual void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) {} + virtual void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst, + const MCSubtargetInfo &STI) {} virtual void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {} /// lifetime management @@ -185,13 +186,16 @@ public: /// Returns the maximum size of a nop in bytes on this target. /// - virtual unsigned getMaximumNopSize() const { return 0; } + virtual unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const { + return 0; + } /// Write an (optimal) nop sequence of Count bytes to the given output. If the /// target cannot generate such a sequence, it should return an error. /// /// \return - True on success. - virtual bool writeNopData(raw_ostream &OS, uint64_t Count) const = 0; + virtual bool writeNopData(raw_ostream &OS, uint64_t Count, + const MCSubtargetInfo *STI) const = 0; /// Give backend an opportunity to finish layout after relaxation virtual void finishLayout(MCAssembler const &Asm, diff --git a/llvm/include/llvm/MC/MCAsmInfoGOFF.h b/llvm/include/llvm/MC/MCAsmInfoGOFF.h new file mode 100644 index 000000000000..1f3b26311b37 --- /dev/null +++ b/llvm/include/llvm/MC/MCAsmInfoGOFF.h @@ -0,0 +1,29 @@ +//===- MCAsmInfoGOFF.h - GOFF Asm Info Fields -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines certain target specific asm properties for GOFF (z/OS) +/// based targets. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCASMINFOGOFF_H +#define LLVM_MC_MCASMINFOGOFF_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { +class MCAsmInfoGOFF : public MCAsmInfo { + virtual void anchor(); + +protected: + MCAsmInfoGOFF(); +}; +} // end namespace llvm + +#endif // LLVM_MC_MCASMINFOGOFF_H diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index 877b2dc4ac92..bde750759a0b 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -817,7 +817,7 @@ namespace llvm { // Unrecoverable error has occurred. Display the best diagnostic we can // and bail via exit(1). For now, most MC backend errors are unrecoverable. // FIXME: We should really do something about that. - LLVM_ATTRIBUTE_NORETURN void reportFatalError(SMLoc L, const Twine &Msg); + [[noreturn]] void reportFatalError(SMLoc L, const Twine &Msg); const MCAsmMacro *lookupMacro(StringRef Name) { StringMap<MCAsmMacro>::iterator I = MacroMap.find(Name); diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h index 23efdc70609b..7e72d56f3097 100644 --- a/llvm/include/llvm/MC/MCDwarf.h +++ b/llvm/include/llvm/MC/MCDwarf.h @@ -20,6 +20,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Error.h" #include "llvm/Support/MD5.h" #include <cassert> @@ -34,7 +35,6 @@ namespace llvm { template <typename T> class ArrayRef; class MCAsmBackend; class MCContext; -class MCDwarfLineStr; class MCObjectStreamer; class MCStreamer; class MCSymbol; @@ -47,6 +47,24 @@ namespace mcdwarf { MCSymbol *emitListsTableHeaderStart(MCStreamer &S); } // namespace mcdwarf +/// Manage the .debug_line_str section contents, if we use it. +class MCDwarfLineStr { + MCSymbol *LineStrLabel = nullptr; + StringTableBuilder LineStrings{StringTableBuilder::DWARF}; + bool UseRelocs = false; + +public: + /// Construct an instance that can emit .debug_line_str (for use in a normal + /// v5 line table). + explicit MCDwarfLineStr(MCContext &Ctx); + + /// Emit a reference to the string. + void emitRef(MCStreamer *MCOS, StringRef Path); + + /// Emit the .debug_line_str section if appropriate. + void emitSection(MCStreamer *MCOS); +}; + /// Instances of this class represent the name of the dwarf .file directive and /// its associated dwarf file number in the MC file. MCDwarfFile's are created /// and uniqued by the MCContext class. In Dwarf 4 file numbers start from 1; @@ -170,6 +188,15 @@ public: MCSymbol *getLabel() const { return Label; } + // This indicates the line entry is synthesized for an end entry. + bool IsEndEntry = false; + + // Override the label with the given EndLabel. + void setEndLabel(MCSymbol *EndLabel) { + Label = EndLabel; + IsEndEntry = true; + } + // This is called when an instruction is assembled into the specified // section and if there is information from the last .loc directive that // has yet to have a line entry made for it is made. @@ -187,6 +214,10 @@ public: MCLineDivisions[Sec].push_back(LineEntry); } + // Add an end entry by cloning the last entry, if exists, for the section + // the given EndLabel belongs to. The label is replaced by the given EndLabel. + void addEndEntry(MCSymbol *EndLabel); + using MCDwarfLineEntryCollection = std::vector<MCDwarfLineEntry>; using iterator = MCDwarfLineEntryCollection::iterator; using const_iterator = MCDwarfLineEntryCollection::const_iterator; @@ -317,6 +348,11 @@ public: void emitCU(MCStreamer *MCOS, MCDwarfLineTableParams Params, Optional<MCDwarfLineStr> &LineStr) const; + // This emits a single line table associated with a given Section. + static void + emitOne(MCStreamer *MCOS, MCSection *Section, + const MCLineSection::MCDwarfLineEntryCollection &LineEntries); + Expected<unsigned> tryGetFile(StringRef &Directory, StringRef &FileName, Optional<MD5::MD5Result> Checksum, Optional<StringRef> Source, diff --git a/llvm/include/llvm/MC/MCELFObjectWriter.h b/llvm/include/llvm/MC/MCELFObjectWriter.h index 9f4b8de7947b..fa17759bc21a 100644 --- a/llvm/include/llvm/MC/MCELFObjectWriter.h +++ b/llvm/include/llvm/MC/MCELFObjectWriter.h @@ -78,6 +78,8 @@ public: case Triple::PS4: case Triple::FreeBSD: return ELF::ELFOSABI_FREEBSD; + case Triple::Solaris: + return ELF::ELFOSABI_SOLARIS; default: return ELF::ELFOSABI_NONE; } diff --git a/llvm/include/llvm/MC/MCELFStreamer.h b/llvm/include/llvm/MC/MCELFStreamer.h index 8c1e22a14702..8f2b176862c8 100644 --- a/llvm/include/llvm/MC/MCELFStreamer.h +++ b/llvm/include/llvm/MC/MCELFStreamer.h @@ -39,7 +39,7 @@ public: /// \name MCStreamer Interface /// @{ - void InitSections(bool NoExecStack) override; + void initSections(bool NoExecStack, const MCSubtargetInfo &STI) override; void changeSection(MCSection *Section, const MCExpr *Subsection) override; void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment *F, diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h index 38cca2413e1e..bf1f32bb91ba 100644 --- a/llvm/include/llvm/MC/MCExpr.h +++ b/llvm/include/llvm/MC/MCExpr.h @@ -200,6 +200,7 @@ public: VK_GOTREL, VK_PCREL, VK_GOTPCREL, + VK_GOTPCREL_NORELAX, VK_GOTTPOFF, VK_INDNTPOFF, VK_NTPOFF, @@ -328,6 +329,7 @@ public: VK_WASM_TLSREL, // Memory address relative to __tls_base VK_WASM_MBREL, // Memory address relative to __memory_base VK_WASM_TBREL, // Table index relative to __table_base + VK_WASM_GOT_TLS, // Wasm global index of TLS symbol. VK_AMDGPU_GOTPCREL32_LO, // symbol@gotpcrel32@lo VK_AMDGPU_GOTPCREL32_HI, // symbol@gotpcrel32@hi diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h index f3a785fb09b7..736fdd992063 100644 --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -311,6 +311,9 @@ class MCAlignFragment : public MCFragment { /// cannot be satisfied in this width then this fragment is ignored. unsigned MaxBytesToEmit; + /// When emitting Nops some subtargets have specific nop encodings. + const MCSubtargetInfo *STI; + public: MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize, unsigned MaxBytesToEmit, MCSection *Sec = nullptr) @@ -326,7 +329,12 @@ public: unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; } bool hasEmitNops() const { return EmitNops; } - void setEmitNops(bool Value) { EmitNops = Value; } + void setEmitNops(bool Value, const MCSubtargetInfo *STI) { + EmitNops = Value; + this->STI = STI; + } + + const MCSubtargetInfo *getSubtargetInfo() const { return STI; } static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_Align; @@ -369,17 +377,22 @@ class MCNopsFragment : public MCFragment { /// Source location of the directive that this fragment was created for. SMLoc Loc; + /// When emitting Nops some subtargets have specific nop encodings. + const MCSubtargetInfo &STI; + public: MCNopsFragment(int64_t NumBytes, int64_t ControlledNopLength, SMLoc L, - MCSection *Sec = nullptr) + const MCSubtargetInfo &STI, MCSection *Sec = nullptr) : MCFragment(FT_Nops, false, Sec), Size(NumBytes), - ControlledNopLength(ControlledNopLength), Loc(L) {} + ControlledNopLength(ControlledNopLength), Loc(L), STI(STI) {} int64_t getNumBytes() const { return Size; } int64_t getControlledNopLength() const { return ControlledNopLength; } SMLoc getLoc() const { return Loc; } + const MCSubtargetInfo *getSubtargetInfo() const { return &STI; } + static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_Nops; } @@ -572,10 +585,14 @@ class MCBoundaryAlignFragment : public MCFragment { /// is not meaningful before that. uint64_t Size = 0; + /// When emitting Nops some subtargets have specific nop encodings. + const MCSubtargetInfo &STI; + public: - MCBoundaryAlignFragment(Align AlignBoundary, MCSection *Sec = nullptr) - : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary) { - } + MCBoundaryAlignFragment(Align AlignBoundary, const MCSubtargetInfo &STI, + MCSection *Sec = nullptr) + : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary), + STI(STI) {} uint64_t getSize() const { return Size; } void setSize(uint64_t Value) { Size = Value; } @@ -589,6 +606,8 @@ public: LastFragment = F; } + const MCSubtargetInfo *getSubtargetInfo() const { return &STI; } + static bool classof(const MCFragment *F) { return F->getKind() == MCFragment::FT_BoundaryAlign; } diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h index 898ca47b13b8..632a7d8f820e 100644 --- a/llvm/include/llvm/MC/MCInstrAnalysis.h +++ b/llvm/include/llvm/MC/MCInstrAnalysis.h @@ -154,9 +154,14 @@ public: /// Given an instruction tries to get the address of a memory operand. Returns /// the address on success. - virtual Optional<uint64_t> evaluateMemoryOperandAddress(const MCInst &Inst, - uint64_t Addr, - uint64_t Size) const; + virtual Optional<uint64_t> + evaluateMemoryOperandAddress(const MCInst &Inst, const MCSubtargetInfo *STI, + uint64_t Addr, uint64_t Size) const; + + /// Given an instruction with a memory operand that could require relocation, + /// returns the offset within the instruction of that relocation. + virtual Optional<uint64_t> + getMemoryOperandRelocationOffset(const MCInst &Inst, uint64_t Size) const; /// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries. virtual std::vector<std::pair<uint64_t, uint64_t>> diff --git a/llvm/include/llvm/MC/MCInstrDesc.h b/llvm/include/llvm/MC/MCInstrDesc.h index 0e6b677098e8..e8ffd29170e6 100644 --- a/llvm/include/llvm/MC/MCInstrDesc.h +++ b/llvm/include/llvm/MC/MCInstrDesc.h @@ -76,7 +76,7 @@ enum OperandType { OPERAND_FIRST_TARGET = 13, }; -} +} // namespace MCOI /// This holds information about one operand of a machine instruction, /// indicating the register class for register operands, etc. @@ -185,7 +185,7 @@ enum Flag { VariadicOpsAreDefs, Authenticated, }; -} +} // namespace MCID /// Describe properties that are true of each instruction in the target /// description file. This captures information about side effects, register diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h index 8ae86ef2a574..ba7450ac64f1 100644 --- a/llvm/include/llvm/MC/MCObjectFileInfo.h +++ b/llvm/include/llvm/MC/MCObjectFileInfo.h @@ -225,10 +225,13 @@ protected: // XCOFF specific sections MCSection *TOCBaseSection = nullptr; + MCSection *ReadOnly8Section = nullptr; + MCSection *ReadOnly16Section = nullptr; public: void initMCObjectFileInfo(MCContext &MCCtx, bool PIC, bool LargeCodeModel = false); + virtual ~MCObjectFileInfo(); MCContext &getContext() const { return *Ctx; } bool getSupportsWeakOmittedEHFrame() const { @@ -251,6 +254,7 @@ public: return CompactUnwindDwarfEHFrameOnly; } + virtual unsigned getTextSectionAlignment() const { return 4; } MCSection *getTextSection() const { return TextSection; } MCSection *getDataSection() const { return DataSection; } MCSection *getBSSSection() const { return BSSSection; } diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index dcdee2b5774b..9d6416e4a18d 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -137,7 +137,7 @@ public: void emitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, unsigned ValueSize = 1, unsigned MaxBytesToEmit = 0) override; - void emitCodeAlignment(unsigned ByteAlignment, + void emitCodeAlignment(unsigned ByteAlignment, const MCSubtargetInfo *STI, unsigned MaxBytesToEmit = 0) override; void emitValueToOffset(const MCExpr *Offset, unsigned char Value, SMLoc Loc) override; @@ -181,8 +181,8 @@ public: SMLoc Loc = SMLoc()) override; void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr, SMLoc Loc = SMLoc()) override; - void emitNops(int64_t NumBytes, int64_t ControlledNopLength, - SMLoc Loc) override; + void emitNops(int64_t NumBytes, int64_t ControlledNopLength, SMLoc Loc, + const MCSubtargetInfo &STI) override; void emitFileDirective(StringRef Filename) override; void emitFileDirective(StringRef Filename, StringRef CompilerVerion, StringRef TimeStamp, StringRef Description) override; diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h index 1c6926b9a9e6..abc9705f0851 100644 --- a/llvm/include/llvm/MC/MCPseudoProbe.h +++ b/llvm/include/llvm/MC/MCPseudoProbe.h @@ -44,17 +44,26 @@ #ifndef LLVM_MC_MCPSEUDOPROBE_H #define LLVM_MC_MCPSEUDOPROBE_H -#include "llvm/ADT/MapVector.h" -#include "llvm/MC/MCSection.h" -#include <functional> +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/PseudoProbe.h" +#include "llvm/Support/ErrorOr.h" +#include <list> #include <map> +#include <memory> +#include <string> +#include <tuple> +#include <type_traits> +#include <unordered_map> #include <vector> namespace llvm { +class MCSection; class MCStreamer; class MCSymbol; class MCObjectStreamer; +class raw_ostream; enum class MCPseudoProbeFlag { // If set, indicates that the probe is encoded as an address delta @@ -62,69 +71,211 @@ enum class MCPseudoProbeFlag { AddressDelta = 0x1, }; +// Function descriptor decoded from .pseudo_probe_desc section +struct MCPseudoProbeFuncDesc { + uint64_t FuncGUID = 0; + uint64_t FuncHash = 0; + std::string FuncName; + + MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name) + : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){}; + + void print(raw_ostream &OS); +}; + +class MCPseudoProbe; +class MCDecodedPseudoProbe; + +// An inline frame has the form <Guid, ProbeID> +using InlineSite = std::tuple<uint64_t, uint32_t>; +using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>; +// GUID to PseudoProbeFuncDesc map +using GUIDProbeFunctionMap = + std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>; +// Address to pseudo probes map. +using AddressProbesMap = + std::unordered_map<uint64_t, std::list<MCDecodedPseudoProbe>>; + +class MCPseudoProbeInlineTree; +class MCDecodedPseudoProbeInlineTree; + +class MCPseudoProbeBase { +protected: + uint64_t Guid; + uint64_t Index; + uint8_t Attributes; + uint8_t Type; + // The value should be equal to PseudoProbeReservedId::Last + 1 which is + // defined in SampleProfileProbe.h. The header file is not included here to + // reduce the dependency from MC to IPO. + const static uint32_t PseudoProbeFirstId = 1; + +public: + MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T) + : Guid(G), Index(I), Attributes(At), Type(T) {} + + bool isEntry() const { return Index == PseudoProbeFirstId; } + + uint64_t getGuid() const { return Guid; } + + uint64_t getIndex() const { return Index; } + + uint8_t getAttributes() const { return Attributes; } + + uint8_t getType() const { return Type; } + + bool isBlock() const { + return Type == static_cast<uint8_t>(PseudoProbeType::Block); + } + + bool isIndirectCall() const { + return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall); + } + + bool isDirectCall() const { + return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall); + } + + bool isCall() const { return isIndirectCall() || isDirectCall(); } + + void setAttributes(uint8_t Attr) { Attributes = Attr; } +}; + /// Instances of this class represent a pseudo probe instance for a pseudo probe /// table entry, which is created during a machine instruction is assembled and /// uses an address from a temporary label created at the current address in the /// current section. -class MCPseudoProbe { +class MCPseudoProbe : public MCPseudoProbeBase { MCSymbol *Label; - uint64_t Guid; - uint64_t Index; - uint8_t Type; - uint8_t Attributes; public: MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attributes) - : Label(Label), Guid(Guid), Index(Index), Type(Type), - Attributes(Attributes) { + : MCPseudoProbeBase(Guid, Index, Attributes, Type), Label(Label) { assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8"); assert(Attributes <= 0xFF && "Probe attributes too big to encode, exceeding 2^16"); } MCSymbol *getLabel() const { return Label; } + void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const; +}; - uint64_t getGuid() const { return Guid; } +// Represents a callsite with caller function name and probe id +using MCPseduoProbeFrameLocation = std::pair<StringRef, uint32_t>; - uint64_t getIndex() const { return Index; } +class MCDecodedPseudoProbe : public MCPseudoProbeBase { + uint64_t Address; + MCDecodedPseudoProbeInlineTree *InlineTree; - uint8_t getType() const { return Type; } +public: + MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K, + uint8_t At, MCDecodedPseudoProbeInlineTree *Tree) + : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K)), Address(Ad), + InlineTree(Tree){}; - uint8_t getAttributes() const { return Attributes; } + uint64_t getAddress() const { return Address; } - void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const; + void setAddress(uint64_t Addr) { Address = Addr; } + + MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const { + return InlineTree; + } + + // Get the inlined context by traversing current inline tree backwards, + // each tree node has its InlineSite which is taken as the context. + // \p ContextStack is populated in root to leaf order + void + getInlineContext(SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack, + const GUIDProbeFunctionMap &GUID2FuncMAP) const; + + // Helper function to get the string from context stack + std::string + getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const; + + // Print pseudo probe while disassembling + void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP, + bool ShowName) const; }; -// An inline frame has the form <Guid, ProbeID> -using InlineSite = std::tuple<uint64_t, uint32_t>; -using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>; +template <typename ProbeType, typename DerivedProbeInlineTreeType> +class MCPseudoProbeInlineTreeBase { + struct InlineSiteHash { + uint64_t operator()(const InlineSite &Site) const { + return std::get<0>(Site) ^ std::get<1>(Site); + } + }; -// A Tri-tree based data structure to group probes by inline stack. -// A tree is allocated for a standalone .text section. A fake -// instance is created as the root of a tree. -// A real instance of this class is created for each function, either an -// unlined function that has code in .text section or an inlined function. -class MCPseudoProbeInlineTree { - uint64_t Guid; +protected: + // Track children (e.g. inlinees) of current context + using InlinedProbeTreeMap = std::unordered_map< + InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>; + InlinedProbeTreeMap Children; // Set of probes that come with the function. - std::vector<MCPseudoProbe> Probes; - // Use std::map for a deterministic output. - std::map<InlineSite, MCPseudoProbeInlineTree *> Inlinees; + std::vector<ProbeType> Probes; + MCPseudoProbeInlineTreeBase() { + static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase, + DerivedProbeInlineTreeType>::value, + "DerivedProbeInlineTreeType must be subclass of " + "MCPseudoProbeInlineTreeBase"); + } + +public: + uint64_t Guid = 0; // Root node has a GUID 0. - bool isRoot() { return Guid == 0; } - MCPseudoProbeInlineTree *getOrAddNode(InlineSite Site); + bool isRoot() const { return Guid == 0; } + InlinedProbeTreeMap &getChildren() { return Children; } + const InlinedProbeTreeMap &getChildren() const { return Children; } + std::vector<ProbeType> &getProbes() { return Probes; } + void addProbes(ProbeType Probe) { Probes.push_back(Probe); } + // Caller node of the inline site + MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent; + DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) { + auto Ret = Children.emplace( + Site, std::make_unique<DerivedProbeInlineTreeType>(Site)); + Ret.first->second->Parent = this; + return Ret.first->second.get(); + }; +}; +// A Tri-tree based data structure to group probes by inline stack. +// A tree is allocated for a standalone .text section. A fake +// instance is created as the root of a tree. +// A real instance of this class is created for each function, either a +// not inlined function that has code in .text section or an inlined function. +class MCPseudoProbeInlineTree + : public MCPseudoProbeInlineTreeBase<MCPseudoProbe, + MCPseudoProbeInlineTree> { public: MCPseudoProbeInlineTree() = default; - MCPseudoProbeInlineTree(uint64_t Guid) : Guid(Guid) {} - ~MCPseudoProbeInlineTree(); + MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; } + MCPseudoProbeInlineTree(const InlineSite &Site) { + this->Guid = std::get<0>(Site); + } + + // MCPseudoProbeInlineTree method based on Inlinees void addPseudoProbe(const MCPseudoProbe &Probe, const MCPseudoProbeInlineStack &InlineStack); void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe); }; +// inline tree node for the decoded pseudo probe +class MCDecodedPseudoProbeInlineTree + : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *, + MCDecodedPseudoProbeInlineTree> { +public: + InlineSite ISite; + // Used for decoding + uint32_t ChildrenToProcess = 0; + + MCDecodedPseudoProbeInlineTree(){}; + MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){}; + + // Return false if it's a dummy inline site + bool hasInlineSite() const { return std::get<0>(ISite) != 0; } +}; + /// Instances of this class represent the pseudo probes inserted into a compile /// unit. class MCPseudoProbeSection { @@ -172,6 +323,83 @@ public: static int DdgPrintIndent; #endif }; + +class MCPseudoProbeDecoder { + // GUID to PseudoProbeFuncDesc map. + GUIDProbeFunctionMap GUID2FuncDescMap; + + // Address to probes map. + AddressProbesMap Address2ProbesMap; + + // The dummy root of the inline trie, all the outlined function will directly + // be the children of the dummy root, all the inlined function will be the + // children of its inlineer. So the relation would be like: + // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2 + MCDecodedPseudoProbeInlineTree DummyInlineRoot; + + /// Points to the current location in the buffer. + const uint8_t *Data = nullptr; + + /// Points to the end of the buffer. + const uint8_t *End = nullptr; + + // Decoding helper function + template <typename T> ErrorOr<T> readUnencodedNumber(); + template <typename T> ErrorOr<T> readUnsignedNumber(); + template <typename T> ErrorOr<T> readSignedNumber(); + ErrorOr<StringRef> readString(uint32_t Size); + +public: + // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map. + bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size); + + // Decode pseudo_probe section to build address to probes map. + bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size); + + // Print pseudo_probe_desc section info + void printGUID2FuncDescMap(raw_ostream &OS); + + // Print pseudo_probe section info, used along with show-disassembly + void printProbeForAddress(raw_ostream &OS, uint64_t Address); + + // do printProbeForAddress for all addresses + void printProbesForAllAddresses(raw_ostream &OS); + + // Look up the probe of a call for the input address + const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const; + + const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const; + + // Helper function to populate one probe's inline stack into + // \p InlineContextStack. + // Current leaf location info will be added if IncludeLeaf is true + // Example: + // Current probe(bar:3) inlined at foo:2 then inlined at main:1 + // IncludeLeaf = true, Output: [main:1, foo:2, bar:3] + // IncludeLeaf = false, Output: [main:1, foo:2] + void getInlineContextForProbe( + const MCDecodedPseudoProbe *Probe, + SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack, + bool IncludeLeaf) const; + + const AddressProbesMap &getAddress2ProbesMap() const { + return Address2ProbesMap; + } + + AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; } + + const GUIDProbeFunctionMap &getGUID2FuncDescMap() const { + return GUID2FuncDescMap; + } + + const MCPseudoProbeFuncDesc * + getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const; + + const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const { + return DummyInlineRoot; + } +}; + } // end namespace llvm #endif // LLVM_MC_MCPSEUDOPROBE_H diff --git a/llvm/include/llvm/MC/MCRegister.h b/llvm/include/llvm/MC/MCRegister.h index 72507b7d8ee4..1e8c747785eb 100644 --- a/llvm/include/llvm/MC/MCRegister.h +++ b/llvm/include/llvm/MC/MCRegister.h @@ -10,6 +10,7 @@ #define LLVM_MC_MCREGISTER_H #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" #include <cassert> #include <limits> diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h index acfbfd387ff3..6dffc158af50 100644 --- a/llvm/include/llvm/MC/MCSchedule.h +++ b/llvm/include/llvm/MC/MCSchedule.h @@ -14,7 +14,6 @@ #ifndef LLVM_MC_MCSCHEDULE_H #define LLVM_MC_MCSCHEDULE_H -#include "llvm/ADT/Optional.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/DataTypes.h" #include <cassert> diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index fd326ff18712..e00f50f617fa 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -123,6 +123,8 @@ public: /// This is used to emit bytes in \p Data as sequence of .byte directives. virtual void emitRawBytes(StringRef Data); + virtual void emitConstantPools(); + virtual void finish(); }; @@ -165,7 +167,7 @@ public: virtual void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value); - void finish() override; + void emitConstantPools() override; /// Reset any state between object emissions, i.e. the equivalent of /// MCStreamer's reset method. @@ -445,7 +447,7 @@ public: } /// Create the default sections and set the initial one. - virtual void InitSections(bool NoExecStack); + virtual void initSections(bool NoExecStack, const MCSubtargetInfo &STI); MCSymbol *endSection(MCSection *Section); @@ -797,7 +799,7 @@ public: SMLoc Loc = SMLoc()); virtual void emitNops(int64_t NumBytes, int64_t ControlledNopLength, - SMLoc Loc); + SMLoc Loc, const MCSubtargetInfo& STI); /// Emit NumBytes worth of zeros. /// This function properly handles data in virtual sections. @@ -831,10 +833,12 @@ public: /// /// \param ByteAlignment - The alignment to reach. This must be a power of /// two on some targets. + /// \param STI - The MCSubtargetInfo in operation when padding is emitted. /// \param MaxBytesToEmit - The maximum numbers of bytes to emit, or 0. If /// the alignment cannot be reached in this many bytes, no bytes are /// emitted. virtual void emitCodeAlignment(unsigned ByteAlignment, + const MCSubtargetInfo *STI, unsigned MaxBytesToEmit = 0); /// Emit some number of copies of \p Value until the byte offset \p diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h index 852ab678e616..5a4852e0e895 100644 --- a/llvm/include/llvm/MC/MCSymbolWasm.h +++ b/llvm/include/llvm/MC/MCSymbolWasm.h @@ -27,7 +27,6 @@ class MCSymbolWasm : public MCSymbol { wasm::WasmSignature *Signature = nullptr; Optional<wasm::WasmGlobalType> GlobalType; Optional<wasm::WasmTableType> TableType; - Optional<wasm::WasmTagType> TagType; /// An expression describing how to calculate the size of a symbol. If a /// symbol has no size this field will be NULL. @@ -67,6 +66,11 @@ public: modifyFlags(wasm::WASM_SYMBOL_NO_STRIP, wasm::WASM_SYMBOL_NO_STRIP); } + bool isTLS() const { return getFlags() & wasm::WASM_SYMBOL_TLS; } + void setTLS() const { + modifyFlags(wasm::WASM_SYMBOL_TLS, wasm::WASM_SYMBOL_TLS); + } + bool isWeak() const { return IsWeak; } void setWeak(bool isWeak) { IsWeak = isWeak; } @@ -142,12 +146,6 @@ public: wasm::WasmLimits Limits = {wasm::WASM_LIMITS_FLAG_NONE, 0, 0}; setTableType({uint8_t(VT), Limits}); } - - const wasm::WasmTagType &getTagType() const { - assert(TagType.hasValue()); - return TagType.getValue(); - } - void setTagType(wasm::WasmTagType ET) { TagType = ET; } }; } // end namespace llvm diff --git a/llvm/include/llvm/MC/MCWasmStreamer.h b/llvm/include/llvm/MC/MCWasmStreamer.h index 6651f071f799..818f59e5ab3e 100644 --- a/llvm/include/llvm/MC/MCWasmStreamer.h +++ b/llvm/include/llvm/MC/MCWasmStreamer.h @@ -41,6 +41,9 @@ public: /// @{ void changeSection(MCSection *Section, const MCExpr *Subsection) override; + void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; + void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment *F, + uint64_t Offset) override; void emitAssemblerFlag(MCAssemblerFlag Flag) override; void emitThumbFunc(MCSymbol *Func) override; void emitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override; @@ -68,6 +71,8 @@ private: void emitInstToFragment(const MCInst &Inst, const MCSubtargetInfo &) override; void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &) override; + void fixSymbolsInTLSFixups(const MCExpr *expr); + /// Merge the content of the fragment \p EF into the fragment \p DF. void mergeFragment(MCDataFragment *, MCDataFragment *); diff --git a/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/llvm/include/llvm/MC/MCWinCOFFStreamer.h index 53b2ef0bd96e..af1ed6faf753 100644 --- a/llvm/include/llvm/MC/MCWinCOFFStreamer.h +++ b/llvm/include/llvm/MC/MCWinCOFFStreamer.h @@ -39,7 +39,7 @@ public: /// \name MCStreamer interface /// \{ - void InitSections(bool NoExecStack) override; + void initSections(bool NoExecStack, const MCSubtargetInfo &STI) override; void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override; void emitAssemblerFlag(MCAssemblerFlag Flag) override; void emitThumbFunc(MCSymbol *Func) override; diff --git a/llvm/include/llvm/Support/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h index e661ae26cb4e..da9a9269edbf 100644 --- a/llvm/include/llvm/Support/TargetRegistry.h +++ b/llvm/include/llvm/MC/TargetRegistry.h @@ -1,4 +1,4 @@ -//===- Support/TargetRegistry.h - Target Registration -----------*- C++ -*-===// +//===- MC/TargetRegistry.h - Target Registration ----------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -15,8 +15,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_TARGETREGISTRY_H -#define LLVM_SUPPORT_TARGETREGISTRY_H +#ifndef LLVM_MC_TARGETREGISTRY_H +#define LLVM_MC_TARGETREGISTRY_H #include "llvm-c/DisassemblerTypes.h" #include "llvm/ADT/Optional.h" @@ -59,6 +59,11 @@ class raw_ostream; class raw_pwrite_stream; class TargetMachine; class TargetOptions; +namespace mca { +class CustomBehaviour; +class InstrPostProcess; +class SourceMgr; +} // namespace mca MCStreamer *createNullStreamer(MCContext &Ctx); // Takes ownership of \p TAB and \p CE. @@ -114,6 +119,13 @@ MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo, void *DisInfo, MCContext *Ctx, std::unique_ptr<MCRelocationInfo> &&RelInfo); +mca::CustomBehaviour *createCustomBehaviour(const MCSubtargetInfo &STI, + const mca::SourceMgr &SrcMgr, + const MCInstrInfo &MCII); + +mca::InstrPostProcess *createInstrPostProcess(const MCSubtargetInfo &STI, + const MCInstrInfo &MCII); + /// Target - Wrapper for Target specific information. /// /// For registration purposes, this is a POD type so that targets can be @@ -206,6 +218,15 @@ public: LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo, MCContext *Ctx, std::unique_ptr<MCRelocationInfo> &&RelInfo); + using CustomBehaviourCtorTy = + mca::CustomBehaviour *(*)(const MCSubtargetInfo &STI, + const mca::SourceMgr &SrcMgr, + const MCInstrInfo &MCII); + + using InstrPostProcessCtorTy = + mca::InstrPostProcess *(*)(const MCSubtargetInfo &STI, + const MCInstrInfo &MCII); + private: /// Next - The next registered target in the linked list, maintained by the /// TargetRegistry. @@ -305,6 +326,14 @@ private: /// MCSymbolizer, if registered (default = llvm::createMCSymbolizer) MCSymbolizerCtorTy MCSymbolizerCtorFn = nullptr; + /// CustomBehaviourCtorFn - Construction function for this target's + /// CustomBehaviour, if registered (default = nullptr). + CustomBehaviourCtorTy CustomBehaviourCtorFn = nullptr; + + /// InstrPostProcessCtorFn - Construction function for this target's + /// InstrPostProcess, if registered (default = nullptr). + InstrPostProcessCtorTy InstrPostProcessCtorFn = nullptr; + public: Target() = default; @@ -623,6 +652,25 @@ public: std::move(RelInfo)); } + /// createCustomBehaviour - Create a target specific CustomBehaviour. + /// This class is used by llvm-mca and requires backend functionality. + mca::CustomBehaviour *createCustomBehaviour(const MCSubtargetInfo &STI, + const mca::SourceMgr &SrcMgr, + const MCInstrInfo &MCII) const { + if (CustomBehaviourCtorFn) + return CustomBehaviourCtorFn(STI, SrcMgr, MCII); + return nullptr; + } + + /// createInstrPostProcess - Create a target specific InstrPostProcess. + /// This class is used by llvm-mca and requires backend functionality. + mca::InstrPostProcess *createInstrPostProcess(const MCSubtargetInfo &STI, + const MCInstrInfo &MCII) const { + if (InstrPostProcessCtorFn) + return InstrPostProcessCtorFn(STI, MCII); + return nullptr; + } + /// @} }; @@ -959,6 +1007,34 @@ struct TargetRegistry { T.MCSymbolizerCtorFn = Fn; } + /// RegisterCustomBehaviour - Register a CustomBehaviour + /// implementation for the given target. + /// + /// Clients are responsible for ensuring that registration doesn't occur + /// while another thread is attempting to access the registry. Typically + /// this is done by initializing all targets at program startup. + /// + /// @param T - The target being registered. + /// @param Fn - A function to construct a CustomBehaviour for the target. + static void RegisterCustomBehaviour(Target &T, + Target::CustomBehaviourCtorTy Fn) { + T.CustomBehaviourCtorFn = Fn; + } + + /// RegisterInstrPostProcess - Register an InstrPostProcess + /// implementation for the given target. + /// + /// Clients are responsible for ensuring that registration doesn't occur + /// while another thread is attempting to access the registry. Typically + /// this is done by initializing all targets at program startup. + /// + /// @param T - The target being registered. + /// @param Fn - A function to construct an InstrPostProcess for the target. + static void RegisterInstrPostProcess(Target &T, + Target::InstrPostProcessCtorTy Fn) { + T.InstrPostProcessCtorFn = Fn; + } + /// @} }; @@ -1294,4 +1370,4 @@ private: } // end namespace llvm -#endif // LLVM_SUPPORT_TARGETREGISTRY_H +#endif // LLVM_MC_TARGETREGISTRY_H diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h index 655a9c49c599..395b07cf722b 100644 --- a/llvm/include/llvm/MCA/CustomBehaviour.h +++ b/llvm/include/llvm/MCA/CustomBehaviour.h @@ -22,6 +22,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MCA/SourceMgr.h" +#include "llvm/MCA/View.h" namespace llvm { namespace mca { @@ -55,29 +56,53 @@ public: class CustomBehaviour { protected: const MCSubtargetInfo &STI; - const SourceMgr &SrcMgr; + const mca::SourceMgr &SrcMgr; const MCInstrInfo &MCII; public: - CustomBehaviour(const MCSubtargetInfo &STI, const SourceMgr &SrcMgr, + CustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII) : STI(STI), SrcMgr(SrcMgr), MCII(MCII) {} virtual ~CustomBehaviour(); - // Before the llvm-mca pipeline dispatches an instruction, it first checks - // for any register or resource dependencies / hazards. If it doesn't find - // any, this method will be invoked to determine if there are any custom - // hazards that the instruction needs to wait for. - // The return value of this method is the number of cycles that the - // instruction needs to wait for. - // It's safe to underestimate the number of cycles to wait for since these - // checks will be invoked again before the intruction gets dispatched. - // However, it's not safe (accurate) to overestimate the number of cycles - // to wait for since the instruction will wait for AT LEAST that number of - // cycles before attempting to be dispatched again. + /// Before the llvm-mca pipeline dispatches an instruction, it first checks + /// for any register or resource dependencies / hazards. If it doesn't find + /// any, this method will be invoked to determine if there are any custom + /// hazards that the instruction needs to wait for. + /// The return value of this method is the number of cycles that the + /// instruction needs to wait for. + /// It's safe to underestimate the number of cycles to wait for since these + /// checks will be invoked again before the intruction gets dispatched. + /// However, it's not safe (accurate) to overestimate the number of cycles + /// to wait for since the instruction will wait for AT LEAST that number of + /// cycles before attempting to be dispatched again. virtual unsigned checkCustomHazard(ArrayRef<InstRef> IssuedInst, const InstRef &IR); + + // Functions that target CBs can override to return a list of + // target specific Views that need to live within /lib/Target/ so that + // they can benefit from the target CB or from backend functionality that is + // not already exposed through MC-layer classes. Keep in mind that how this + // function is used is that the function is called within llvm-mca.cpp and + // then each unique_ptr<View> is passed into the PipelinePrinter::addView() + // function. This function will then std::move the View into its own vector of + // Views. So any CB that overrides this function needs to make sure that they + // are not relying on the current address or reference of the View + // unique_ptrs. If you do need the CB and View to be able to communicate with + // each other, consider giving the View a reference or pointer to the CB when + // the View is constructed. Then the View can query the CB for information + // when it needs it. + /// Return a vector of Views that will be added before all other Views. + virtual std::vector<std::unique_ptr<View>> + getStartViews(llvm::MCInstPrinter &IP, llvm::ArrayRef<llvm::MCInst> Insts); + /// Return a vector of Views that will be added after the InstructionInfoView. + virtual std::vector<std::unique_ptr<View>> + getPostInstrInfoViews(llvm::MCInstPrinter &IP, + llvm::ArrayRef<llvm::MCInst> Insts); + /// Return a vector of Views that will be added after all other Views. + virtual std::vector<std::unique_ptr<View>> + getEndViews(llvm::MCInstPrinter &IP, llvm::ArrayRef<llvm::MCInst> Insts); }; } // namespace mca diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h index 988cddcbe013..3eb32186d551 100644 --- a/llvm/include/llvm/MCA/Instruction.h +++ b/llvm/include/llvm/MCA/Instruction.h @@ -46,7 +46,7 @@ class MCAOperand { kSFPImmediate, ///< Single-floating-point immediate operand. kDFPImmediate, ///< Double-Floating-point immediate operand. }; - MCAOperandType Kind = kInvalid; + MCAOperandType Kind; union { unsigned RegVal; @@ -62,7 +62,7 @@ class MCAOperand { unsigned Index; public: - MCAOperand() : FPImmVal(0) {} + MCAOperand() : Kind(kInvalid), FPImmVal(), Index() {} bool isValid() const { return Kind != kInvalid; } bool isReg() const { return Kind == kRegister; } diff --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h index b7006e761647..42f386a13d85 100644 --- a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h +++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h @@ -21,6 +21,7 @@ namespace llvm { namespace mca { +class LSUnit; class RegisterFile; struct StallInfo { @@ -29,6 +30,7 @@ struct StallInfo { REGISTER_DEPS, DISPATCH, DELAY, + LOAD_STORE, CUSTOM_STALL }; @@ -54,6 +56,7 @@ class InOrderIssueStage final : public Stage { RegisterFile &PRF; ResourceManager RM; CustomBehaviour &CB; + LSUnit &LSU; /// Instructions that were issued, but not executed yet. SmallVector<InstRef, 4> IssuedInst; @@ -110,7 +113,7 @@ class InOrderIssueStage final : public Stage { public: InOrderIssueStage(const MCSubtargetInfo &STI, RegisterFile &PRF, - CustomBehaviour &CB); + CustomBehaviour &CB, LSUnit &LSU); unsigned getIssueWidth() const; bool isAvailable(const InstRef &) const override; diff --git a/llvm/include/llvm/MCA/View.h b/llvm/include/llvm/MCA/View.h new file mode 100644 index 000000000000..ff8fc1ceb3f1 --- /dev/null +++ b/llvm/include/llvm/MCA/View.h @@ -0,0 +1,41 @@ +//===----------------------- View.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the main interface for Views. Each view contributes a +/// portion of the final report generated by the tool. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_VIEW_H +#define LLVM_MCA_VIEW_H + +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MCA/HWEventListener.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace mca { + +class View : public HWEventListener { +public: + virtual ~View() = default; + + virtual void printView(llvm::raw_ostream &OS) const = 0; + virtual StringRef getNameAsString() const = 0; + + virtual json::Value toJSON() const { return "not implemented"; } + virtual bool isSerializable() const { return true; } + + void anchor() override; +}; +} // namespace mca +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index c5f966891bd0..37f23c435ae1 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -81,10 +81,6 @@ getElfArchType(StringRef Object) { (uint8_t)Object[ELF::EI_DATA]); } -static inline Error createError(const Twine &Err) { - return make_error<StringError>(Err, object_error::parse_failed); -} - enum PPCInstrMasks : uint64_t { PADDI_R12_NO_DISP = 0x0610000039800000, ADDIS_R12_TO_R2_NO_DISP = 0x3D820000, @@ -392,8 +388,7 @@ public: Expected<ArrayRef<T>> getSectionContentsAsArray(const Elf_Shdr &Sec) const; Expected<ArrayRef<uint8_t>> getSectionContents(const Elf_Shdr &Sec) const; Expected<ArrayRef<uint8_t>> getSegmentContents(const Elf_Phdr &Phdr) const; - Expected<std::vector<Elf_BBAddrMap>> - decodeBBAddrMap(const Elf_Shdr &Sec) const; + Expected<std::vector<BBAddrMap>> decodeBBAddrMap(const Elf_Shdr &Sec) const; }; using ELF32LEFile = ELFFile<ELF32LE>; diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index c87a09f86fae..716b94d92d03 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -96,6 +96,10 @@ public: std::vector<std::pair<Optional<DataRefImpl>, uint64_t>> getPltAddresses() const; + + /// Returns a vector containing a symbol version for each dynamic symbol. + /// Returns an empty vector if version sections do not exist. + Expected<std::vector<VersionEntry>> readDynsymVersions() const; }; class ELFSectionRef : public SectionRef { @@ -407,7 +411,8 @@ public: const Elf_Shdr *getRelSection(DataRefImpl Rel) const { auto RelSecOrErr = EF.getSection(Rel.d.a); if (!RelSecOrErr) - report_fatal_error(errorToErrorCode(RelSecOrErr.takeError()).message()); + report_fatal_error( + Twine(errorToErrorCode(RelSecOrErr.takeError()).message())); return *RelSecOrErr; } @@ -728,7 +733,8 @@ Expected<uint32_t> ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const { } else if (EF.getHeader().e_machine == ELF::EM_ARM) { if (Expected<StringRef> NameOrErr = getSymbolName(Sym)) { StringRef Name = *NameOrErr; - if (Name.startswith("$d") || Name.startswith("$t") || + // TODO Investigate why empty name symbols need to be marked. + if (Name.empty() || Name.startswith("$d") || Name.startswith("$t") || Name.startswith("$a")) Result |= SymbolRef::SF_FormatSpecific; } else { @@ -966,7 +972,8 @@ ELFObjectFile<ELFT>::section_rel_end(DataRefImpl Sec) const { // Error check sh_link here so that getRelocationSymbol can just use it. auto SymSecOrErr = EF.getSection(RelSec->sh_link); if (!SymSecOrErr) - report_fatal_error(errorToErrorCode(SymSecOrErr.takeError()).message()); + report_fatal_error( + Twine(errorToErrorCode(SymSecOrErr.takeError()).message())); RelData.d.b += S->sh_size / S->sh_entsize; return relocation_iterator(RelocationRef(RelData, this)); @@ -1055,7 +1062,7 @@ ELFObjectFile<ELFT>::getRel(DataRefImpl Rel) const { assert(getRelSection(Rel)->sh_type == ELF::SHT_REL); auto Ret = EF.template getEntry<Elf_Rel>(Rel.d.a, Rel.d.b); if (!Ret) - report_fatal_error(errorToErrorCode(Ret.takeError()).message()); + report_fatal_error(Twine(errorToErrorCode(Ret.takeError()).message())); return *Ret; } @@ -1065,7 +1072,7 @@ ELFObjectFile<ELFT>::getRela(DataRefImpl Rela) const { assert(getRelSection(Rela)->sh_type == ELF::SHT_RELA); auto Ret = EF.template getEntry<Elf_Rela>(Rela.d.a, Rela.d.b); if (!Ret) - report_fatal_error(errorToErrorCode(Ret.takeError()).message()); + report_fatal_error(Twine(errorToErrorCode(Ret.takeError()).message())); return *Ret; } diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index 54ebd751d8d2..e59a63d93989 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -44,7 +44,6 @@ template <class ELFT> struct Elf_Nhdr_Impl; template <class ELFT> class Elf_Note_Impl; template <class ELFT> class Elf_Note_Iterator_Impl; template <class ELFT> struct Elf_CGProfile_Impl; -template <class ELFT> struct Elf_BBAddrMap_Impl; template <endianness E, bool Is64> struct ELFType { private: @@ -76,7 +75,6 @@ public: using Note = Elf_Note_Impl<ELFType<E, Is64>>; using NoteIterator = Elf_Note_Iterator_Impl<ELFType<E, Is64>>; using CGProfile = Elf_CGProfile_Impl<ELFType<E, Is64>>; - using BBAddrMap = Elf_BBAddrMap_Impl<ELFType<E, Is64>>; using DynRange = ArrayRef<Dyn>; using ShdrRange = ArrayRef<Shdr>; using SymRange = ArrayRef<Sym>; @@ -131,7 +129,6 @@ using ELF64BE = ELFType<support::big, true>; using Elf_Note = typename ELFT::Note; \ using Elf_Note_Iterator = typename ELFT::NoteIterator; \ using Elf_CGProfile = typename ELFT::CGProfile; \ - using Elf_BBAddrMap = typename ELFT::BBAddrMap; \ using Elf_Dyn_Range = typename ELFT::DynRange; \ using Elf_Shdr_Range = typename ELFT::ShdrRange; \ using Elf_Sym_Range = typename ELFT::SymRange; \ @@ -797,9 +794,8 @@ template <class ELFT> struct Elf_Mips_ABIFlags { }; // Struct representing the BBAddrMap for one function. -template <class ELFT> struct Elf_BBAddrMap_Impl { - LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - uintX_t Addr; // Function address +struct BBAddrMap { + uint64_t Addr; // Function address // Struct representing the BBAddrMap information for one basic block. struct BBEntry { uint32_t Offset; // Offset of basic block relative to function start. diff --git a/llvm/include/llvm/Object/Error.h b/llvm/include/llvm/Object/Error.h index 07744188444a..1fc1f6603a36 100644 --- a/llvm/include/llvm/Object/Error.h +++ b/llvm/include/llvm/Object/Error.h @@ -82,6 +82,10 @@ private: /// error() function needs to called on the llvm::Error. Error isNotObjectErrorInvalidFileType(llvm::Error Err); +inline Error createError(const Twine &Err) { + return make_error<StringError>(Err, object_error::parse_failed); +} + } // end namespace object. } // end namespace llvm. diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h index d2ad12e98deb..ca5d63e4074f 100644 --- a/llvm/include/llvm/Object/MachO.h +++ b/llvm/include/llvm/Object/MachO.h @@ -311,6 +311,9 @@ public: bool isSectionBitcode(DataRefImpl Sec) const override; bool isDebugSection(DataRefImpl Sec) const override; + /// Return the raw contents of an entire segment. + ArrayRef<uint8_t> getSegmentContents(StringRef SegmentName) const; + /// When dsymutil generates the companion file, it strips all unnecessary /// sections (e.g. everything in the _TEXT segment) by omitting their body /// and setting the offset in their corresponding load command to zero. diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h index 2cea950fcf25..e4802c087b8b 100644 --- a/llvm/include/llvm/Object/Wasm.h +++ b/llvm/include/llvm/Object/Wasm.h @@ -9,7 +9,7 @@ // This file declares the WasmObjectFile class, which implements the ObjectFile // interface for Wasm files. // -// See: https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md +// See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md // //===----------------------------------------------------------------------===// @@ -37,15 +37,13 @@ public: WasmSymbol(const wasm::WasmSymbolInfo &Info, const wasm::WasmGlobalType *GlobalType, const wasm::WasmTableType *TableType, - const wasm::WasmTagType *TagType, const wasm::WasmSignature *Signature) : Info(Info), GlobalType(GlobalType), TableType(TableType), - TagType(TagType), Signature(Signature) {} + Signature(Signature) {} const wasm::WasmSymbolInfo &Info; const wasm::WasmGlobalType *GlobalType; const wasm::WasmTableType *TableType; - const wasm::WasmTagType *TagType; const wasm::WasmSignature *Signature; bool isTypeFunction() const { @@ -138,7 +136,6 @@ public: return TargetFeatures; } ArrayRef<wasm::WasmSignature> types() const { return Signatures; } - ArrayRef<uint32_t> functionTypes() const { return FunctionTypes; } ArrayRef<wasm::WasmImport> imports() const { return Imports; } ArrayRef<wasm::WasmTable> tables() const { return Tables; } ArrayRef<wasm::WasmLimits> memories() const { return Memories; } @@ -260,6 +257,7 @@ private: // Custom section types Error parseDylinkSection(ReadContext &Ctx); + Error parseDylink0Section(ReadContext &Ctx); Error parseNameSection(ReadContext &Ctx); Error parseLinkingSection(ReadContext &Ctx); Error parseLinkingSectionSymtab(ReadContext &Ctx); @@ -274,7 +272,6 @@ private: wasm::WasmProducerInfo ProducerInfo; std::vector<wasm::WasmFeatureEntry> TargetFeatures; std::vector<wasm::WasmSignature> Signatures; - std::vector<uint32_t> FunctionTypes; std::vector<wasm::WasmTable> Tables; std::vector<wasm::WasmLimits> Memories; std::vector<wasm::WasmGlobal> Globals; diff --git a/llvm/include/llvm/Object/XCOFFObjectFile.h b/llvm/include/llvm/Object/XCOFFObjectFile.h index 7d024fbc3eae..94136afc45ea 100644 --- a/llvm/include/llvm/Object/XCOFFObjectFile.h +++ b/llvm/include/llvm/Object/XCOFFObjectFile.h @@ -51,6 +51,101 @@ struct XCOFFFileHeader64 { support::ubig32_t NumberOfSymTableEntries; }; +template <typename T> struct XCOFFAuxiliaryHeader { + static constexpr uint8_t AuxiHeaderFlagMask = 0xF0; + static constexpr uint8_t AuxiHeaderTDataAlignmentMask = 0x0F; + +public: + uint8_t getFlag() const { + return static_cast<const T *>(this)->FlagAndTDataAlignment & + AuxiHeaderFlagMask; + } + uint8_t getTDataAlignment() const { + return static_cast<const T *>(this)->FlagAndTDataAlignment & + AuxiHeaderTDataAlignmentMask; + } +}; + +struct XCOFFAuxiliaryHeader32 : XCOFFAuxiliaryHeader<XCOFFAuxiliaryHeader32> { + support::ubig16_t + AuxMagic; ///< If the value of the o_vstamp field is greater than 1, the + ///< o_mflags field is reserved for future use and it should + ///< contain 0. Otherwise, this field is not used. + support::ubig16_t + Version; ///< The valid values are 1 and 2. When the o_vstamp field is 2 + ///< in an XCOFF32 file, the new interpretation of the n_type + ///< field in the symbol table entry is used. + support::ubig32_t TextSize; + support::ubig32_t InitDataSize; + support::ubig32_t BssDataSize; + support::ubig32_t EntryPointAddr; + support::ubig32_t TextStartAddr; + support::ubig32_t DataStartAddr; + support::ubig32_t TOCAnchorAddr; + support::ubig16_t SecNumOfEntryPoint; + support::ubig16_t SecNumOfText; + support::ubig16_t SecNumOfData; + support::ubig16_t SecNumOfTOC; + support::ubig16_t SecNumOfLoader; + support::ubig16_t SecNumOfBSS; + support::ubig16_t MaxAlignOfText; + support::ubig16_t MaxAlignOfData; + support::ubig16_t ModuleType; + uint8_t CpuFlag; + uint8_t CpuType; + support::ubig32_t MaxStackSize; ///< If the value is 0, the system default + ///< maximum stack size is used. + support::ubig32_t MaxDataSize; ///< If the value is 0, the system default + ///< maximum data size is used. + support::ubig32_t + ReservedForDebugger; ///< This field should contain 0. When a loaded + ///< program is being debugged, the memory image of + ///< this field may be modified by a debugger to + ///< insert a trap instruction. + uint8_t TextPageSize; ///< Specifies the size of pages for the exec text. The + ///< default value is 0 (system-selected page size). + uint8_t DataPageSize; ///< Specifies the size of pages for the exec data. The + ///< default value is 0 (system-selected page size). + uint8_t StackPageSize; ///< Specifies the size of pages for the stack. The + ///< default value is 0 (system-selected page size). + uint8_t FlagAndTDataAlignment; + support::ubig16_t SecNumOfTData; + support::ubig16_t SecNumOfTBSS; +}; + +struct XCOFFAuxiliaryHeader64 : XCOFFAuxiliaryHeader<XCOFFAuxiliaryHeader32> { + support::ubig16_t AuxMagic; + support::ubig16_t Version; + support::ubig32_t ReservedForDebugger; + support::ubig64_t TextStartAddr; + support::ubig64_t DataStartAddr; + support::ubig64_t TOCAnchorAddr; + support::ubig16_t SecNumOfEntryPoint; + support::ubig16_t SecNumOfText; + support::ubig16_t SecNumOfData; + support::ubig16_t SecNumOfTOC; + support::ubig16_t SecNumOfLoader; + support::ubig16_t SecNumOfBSS; + support::ubig16_t MaxAlignOfText; + support::ubig16_t MaxAlignOfData; + support::ubig16_t ModuleType; + uint8_t CpuFlag; + uint8_t CpuType; + uint8_t TextPageSize; + uint8_t DataPageSize; + uint8_t StackPageSize; + uint8_t FlagAndTDataAlignment; + support::ubig64_t TextSize; + support::ubig64_t InitDataSize; + support::ubig64_t BssDataSize; + support::ubig64_t EntryPointAddr; + support::ubig64_t MaxStackSize; + support::ubig64_t MaxDataSize; + support::ubig16_t SecNumOfTData; + support::ubig16_t SecNumOfTBSS; + support::ubig16_t XCOFF64Flag; +}; + template <typename T> struct XCOFFSectionHeader { // Least significant 3 bits are reserved. static constexpr unsigned SectionFlagsReservedMask = 0x7; @@ -97,6 +192,31 @@ struct XCOFFSectionHeader64 : XCOFFSectionHeader<XCOFFSectionHeader64> { char Padding[4]; }; +struct LoaderSectionHeader32 { + support::ubig32_t Version; + support::ubig32_t NumberOfSymTabEnt; + support::ubig32_t NumberOfRelTabEnt; + support::ubig32_t LengthOfImpidStrTbl; + support::ubig32_t NumberOfImpid; + support::big32_t OffsetToImpid; + support::ubig32_t LengthOfStrTbl; + support::big32_t OffsetToStrTbl; +}; + +struct LoaderSectionHeader64 { + support::ubig32_t Version; + support::ubig32_t NumberOfSymTabEnt; + support::ubig32_t NumberOfRelTabEnt; + support::ubig32_t LengthOfImpidStrTbl; + support::ubig32_t NumberOfImpid; + support::ubig32_t LengthOfStrTbl; + support::big64_t OffsetToImpid; + support::big64_t OffsetToStrTbl; + support::big64_t OffsetToSymTbl; + char Padding[16]; + support::big32_t OffsetToRelEnt; +}; + struct XCOFFStringTable { uint32_t Size; const char *Data; @@ -228,7 +348,7 @@ struct XCOFFSectAuxEntForStat { uint8_t Pad[10]; }; // 32-bit XCOFF file only. -struct XCOFFRelocation32 { +template <typename AddressType> struct XCOFFRelocation { // Masks for packing/unpacking the r_rsize field of relocations. // The msb is used to indicate if the bits being relocated are signed or @@ -244,7 +364,7 @@ struct XCOFFRelocation32 { static constexpr uint8_t XR_BIASED_LENGTH_MASK = 0x3f; public: - support::ubig32_t VirtualAddress; + AddressType VirtualAddress; support::ubig32_t SymbolIndex; // Packed field, see XR_* masks for details of packing. @@ -260,11 +380,18 @@ public: uint8_t getRelocatedLength() const; }; +extern template struct XCOFFRelocation<llvm::support::ubig32_t>; +extern template struct XCOFFRelocation<llvm::support::ubig64_t>; + +struct XCOFFRelocation32 : XCOFFRelocation<llvm::support::ubig32_t> {}; +struct XCOFFRelocation64 : XCOFFRelocation<llvm::support::ubig64_t> {}; + class XCOFFSymbolRef; class XCOFFObjectFile : public ObjectFile { private: const void *FileHeader = nullptr; + const void *AuxiliaryHeader = nullptr; const void *SectionHeaderTable = nullptr; const void *SymbolTblPtr = nullptr; @@ -275,6 +402,7 @@ private: const XCOFFSectionHeader32 *sectionHeaderTable32() const; const XCOFFSectionHeader64 *sectionHeaderTable64() const; + template <typename T> const T *sectionHeaderTable() const; size_t getFileHeaderSize() const; size_t getSectionHeaderSize() const; @@ -283,6 +411,7 @@ private: const XCOFFSectionHeader64 *toSection64(DataRefImpl Ref) const; uintptr_t getSectionHeaderTableAddress() const; uintptr_t getEndOfSymbolTableAddress() const; + Expected<uintptr_t> getLoaderSectionAddress() const; // This returns a pointer to the start of the storage for the name field of // the 32-bit or 64-bit SectionHeader struct. This string is *not* necessarily @@ -322,6 +451,7 @@ public: Expected<StringRef> getSymbolName(DataRefImpl Symb) const override; Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override; uint64_t getSymbolValueImpl(DataRefImpl Symb) const override; + uint32_t getSymbolAlignment(DataRefImpl Symb) const override; uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override; Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override; Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override; @@ -368,6 +498,9 @@ public: // Below here is the non-inherited interface. bool is64Bit() const; + const XCOFFAuxiliaryHeader32 *auxiliaryHeader32() const; + const XCOFFAuxiliaryHeader64 *auxiliaryHeader64() const; + const void *getPointerToSymbolTable() const { return SymbolTblPtr; } Expected<StringRef> getSymbolSectionName(XCOFFSymbolRef Ref) const; @@ -398,6 +531,11 @@ public: uint32_t getNumberOfSymbolTableEntries() const; uint32_t getSymbolIndex(uintptr_t SymEntPtr) const; + uint64_t getSymbolSize(DataRefImpl Symb) const; + uintptr_t getSymbolByIndex(uint32_t Idx) const { + return reinterpret_cast<uintptr_t>(SymbolTblPtr) + + XCOFF::SymbolTableEntrySize * Idx; + } uintptr_t getSymbolEntryAddressByIndex(uint32_t SymbolTableIndex) const; Expected<StringRef> getSymbolNameByIndex(uint32_t SymbolTableIndex) const; @@ -415,11 +553,15 @@ public: void checkSymbolEntryPointer(uintptr_t SymbolEntPtr) const; // Relocation-related interfaces. + template <typename T> Expected<uint32_t> - getLogicalNumberOfRelocationEntries(const XCOFFSectionHeader32 &Sec) const; + getNumberOfRelocationEntries(const XCOFFSectionHeader<T> &Sec) const; - Expected<ArrayRef<XCOFFRelocation32>> - relocations(const XCOFFSectionHeader32 &) const; + template <typename Shdr, typename Reloc> + Expected<ArrayRef<Reloc>> relocations(const Shdr &Sec) const; + + // Loader section related interfaces. + Expected<StringRef> getImportFileTable() const; // This function returns string table entry. Expected<StringRef> getStringTableEntry(uint32_t Offset) const; @@ -572,6 +714,7 @@ class XCOFFTracebackTable { Optional<uint8_t> ExtensionTable; XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size, Error &Err); + public: /// Parse an XCOFF Traceback Table from \a Ptr with \a Size bytes. /// Returns an XCOFFTracebackTable upon successful parsing, otherwise an diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h index 5d1d3ee23594..ee89f4eac61f 100644 --- a/llvm/include/llvm/ObjectYAML/MachOYAML.h +++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h @@ -131,6 +131,7 @@ struct Object { std::vector<LoadCommand> LoadCommands; std::vector<Section> Sections; LinkEditData LinkEdit; + Optional<llvm::yaml::BinaryRef> RawLinkEditSegment; DWARFYAML::Data DWARF; }; diff --git a/llvm/include/llvm/ObjectYAML/WasmYAML.h b/llvm/include/llvm/ObjectYAML/WasmYAML.h index 661e06fba8bd..e3a1ba0d58a6 100644 --- a/llvm/include/llvm/ObjectYAML/WasmYAML.h +++ b/llvm/include/llvm/ObjectYAML/WasmYAML.h @@ -77,12 +77,6 @@ struct Global { wasm::WasmInitExpr InitExpr; }; -struct Tag { - uint32_t Index; - uint32_t Attribute; - uint32_t SigIndex; -}; - struct Import { StringRef Module; StringRef Field; @@ -92,7 +86,7 @@ struct Import { Global GlobalImport; Table TableImport; Limits Memory; - Tag TagImport; + uint32_t TagIndex; }; }; @@ -199,12 +193,23 @@ struct CustomSection : Section { yaml::BinaryRef Payload; }; +struct DylinkImportInfo { + StringRef Module; + StringRef Field; + SymbolFlags Flags; +}; + +struct DylinkExportInfo { + StringRef Name; + SymbolFlags Flags; +}; + struct DylinkSection : CustomSection { - DylinkSection() : CustomSection("dylink") {} + DylinkSection() : CustomSection("dylink.0") {} static bool classof(const Section *S) { auto C = dyn_cast<CustomSection>(S); - return C && C->Name == "dylink"; + return C && C->Name == "dylink.0"; } uint32_t MemorySize; @@ -212,6 +217,8 @@ struct DylinkSection : CustomSection { uint32_t TableSize; uint32_t TableAlignment; std::vector<StringRef> Needed; + std::vector<DylinkImportInfo> ImportInfo; + std::vector<DylinkExportInfo> ExportInfo; }; struct NameSection : CustomSection { @@ -323,7 +330,7 @@ struct TagSection : Section { return S->Type == wasm::WASM_SEC_TAG; } - std::vector<Tag> Tags; + std::vector<uint32_t> TagTypes; }; struct GlobalSection : Section { @@ -425,7 +432,8 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SymbolInfo) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::InitFunction) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::ComdatEntry) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Comdat) -LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Tag) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::DylinkImportInfo) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::DylinkExportInfo) namespace llvm { namespace yaml { @@ -570,8 +578,12 @@ template <> struct ScalarEnumerationTraits<WasmYAML::RelocType> { static void enumeration(IO &IO, WasmYAML::RelocType &Kind); }; -template <> struct MappingTraits<WasmYAML::Tag> { - static void mapping(IO &IO, WasmYAML::Tag &Tag); +template <> struct MappingTraits<WasmYAML::DylinkImportInfo> { + static void mapping(IO &IO, WasmYAML::DylinkImportInfo &Info); +}; + +template <> struct MappingTraits<WasmYAML::DylinkExportInfo> { + static void mapping(IO &IO, WasmYAML::DylinkExportInfo &Info); }; } // end namespace yaml diff --git a/llvm/include/llvm/ObjectYAML/XCOFFYAML.h b/llvm/include/llvm/ObjectYAML/XCOFFYAML.h index 2630175642c4..aa1bc396f134 100644 --- a/llvm/include/llvm/ObjectYAML/XCOFFYAML.h +++ b/llvm/include/llvm/ObjectYAML/XCOFFYAML.h @@ -24,11 +24,43 @@ struct FileHeader { uint16_t NumberOfSections; int32_t TimeStamp; llvm::yaml::Hex64 SymbolTableOffset; - uint32_t NumberOfSymTableEntries; + int32_t NumberOfSymTableEntries; uint16_t AuxHeaderSize; llvm::yaml::Hex16 Flags; }; +struct AuxiliaryHeader { + Optional<llvm::yaml::Hex16> Magic; + Optional<llvm::yaml::Hex16> Version; + Optional<llvm::yaml::Hex64> TextStartAddr; + Optional<llvm::yaml::Hex64> DataStartAddr; + Optional<llvm::yaml::Hex64> TOCAnchorAddr; + Optional<uint16_t> SecNumOfEntryPoint; + Optional<uint16_t> SecNumOfText; + Optional<uint16_t> SecNumOfData; + Optional<uint16_t> SecNumOfTOC; + Optional<uint16_t> SecNumOfLoader; + Optional<uint16_t> SecNumOfBSS; + Optional<llvm::yaml::Hex16> MaxAlignOfText; + Optional<llvm::yaml::Hex16> MaxAlignOfData; + Optional<llvm::yaml::Hex16> ModuleType; + Optional<llvm::yaml::Hex8> CpuFlag; + Optional<llvm::yaml::Hex8> CpuType; + Optional<llvm::yaml::Hex8> TextPageSize; + Optional<llvm::yaml::Hex8> DataPageSize; + Optional<llvm::yaml::Hex8> StackPageSize; + Optional<llvm::yaml::Hex8> FlagAndTDataAlignment; + Optional<llvm::yaml::Hex64> TextSize; + Optional<llvm::yaml::Hex64> InitDataSize; + Optional<llvm::yaml::Hex64> BssDataSize; + Optional<llvm::yaml::Hex64> EntryPointAddr; + Optional<llvm::yaml::Hex64> MaxStackSize; + Optional<llvm::yaml::Hex64> MaxDataSize; + Optional<uint16_t> SecNumOfTData; + Optional<uint16_t> SecNumOfTBSS; + Optional<llvm::yaml::Hex16> Flag; +}; + struct Relocation { llvm::yaml::Hex64 VirtualAddress; llvm::yaml::Hex64 SymbolIndex; @@ -53,16 +85,27 @@ struct Section { struct Symbol { StringRef SymbolName; llvm::yaml::Hex64 Value; // Symbol value; storage class-dependent. - StringRef SectionName; + Optional<StringRef> SectionName; + Optional<uint16_t> SectionIndex; llvm::yaml::Hex16 Type; XCOFF::StorageClass StorageClass; uint8_t NumberOfAuxEntries; }; +struct StringTable { + Optional<uint32_t> ContentSize; // The total size of the string table. + Optional<uint32_t> Length; // The value of the length field for the first + // 4 bytes of the table. + Optional<std::vector<StringRef>> Strings; + Optional<yaml::BinaryRef> RawContent; +}; + struct Object { FileHeader Header; + Optional<AuxiliaryHeader> AuxHeader; std::vector<Section> Sections; std::vector<Symbol> Symbols; + StringTable StrTbl; Object(); }; } // namespace XCOFFYAML @@ -87,6 +130,9 @@ template <> struct MappingTraits<XCOFFYAML::FileHeader> { static void mapping(IO &IO, XCOFFYAML::FileHeader &H); }; +template <> struct MappingTraits<XCOFFYAML::AuxiliaryHeader> { + static void mapping(IO &IO, XCOFFYAML::AuxiliaryHeader &AuxHdr); +}; template <> struct MappingTraits<XCOFFYAML::Symbol> { static void mapping(IO &IO, XCOFFYAML::Symbol &S); @@ -100,6 +146,10 @@ template <> struct MappingTraits<XCOFFYAML::Section> { static void mapping(IO &IO, XCOFFYAML::Section &Sec); }; +template <> struct MappingTraits<XCOFFYAML::StringTable> { + static void mapping(IO &IO, XCOFFYAML::StringTable &Str); +}; + template <> struct MappingTraits<XCOFFYAML::Object> { static void mapping(IO &IO, XCOFFYAML::Object &Obj); }; diff --git a/llvm/include/llvm/Option/Arg.h b/llvm/include/llvm/Option/Arg.h index 22e2bcf06a6e..4be254ccdab4 100644 --- a/llvm/include/llvm/Option/Arg.h +++ b/llvm/include/llvm/Option/Arg.h @@ -118,10 +118,7 @@ public: const SmallVectorImpl<const char *> &getValues() const { return Values; } bool containsValue(StringRef Value) const { - for (unsigned i = 0, e = getNumValues(); i != e; ++i) - if (Values[i] == Value) - return true; - return false; + return llvm::is_contained(Values, Value); } /// Append the argument onto the given array as strings. diff --git a/llvm/include/llvm/Option/OptParser.td b/llvm/include/llvm/Option/OptParser.td index 96014b505d0f..9c73f478db5e 100644 --- a/llvm/include/llvm/Option/OptParser.td +++ b/llvm/include/llvm/Option/OptParser.td @@ -214,7 +214,7 @@ class MarshallingInfoBitfieldFlag<KeyPathAndMacro kpm, code value> } // Implementation detail of BoolOption. -class MarshallingInfoBooleanFlag<KeyPathAndMacro kpm, code defaultvalue, code value, code name, +class MarshallingInfoBooleanFlag<KeyPathAndMacro kpm, code defaultvalue, code value, code other_value, code other_name> : MarshallingInfoFlag<kpm, defaultvalue> { code Normalizer = "makeBooleanOptionNormalizer("#value#", "#other_value#", OPT_"#other_name#")"; diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h index ca2013ee6f04..07d9870f71b3 100644 --- a/llvm/include/llvm/Option/OptTable.h +++ b/llvm/include/llvm/Option/OptTable.h @@ -64,8 +64,8 @@ private: bool GroupedShortOptions = false; const char *EnvVar = nullptr; - unsigned TheInputOptionID = 0; - unsigned TheUnknownOptionID = 0; + unsigned InputOptionID = 0; + unsigned UnknownOptionID = 0; /// The index of the first option which can be parsed (i.e., is not a /// special option like 'input' or 'unknown', and is not an option group). @@ -83,7 +83,8 @@ private: return OptionInfos[id - 1]; } - Arg *parseOneArgGrouped(InputArgList &Args, unsigned &Index) const; + std::unique_ptr<Arg> parseOneArgGrouped(InputArgList &Args, + unsigned &Index) const; protected: OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false); @@ -199,9 +200,9 @@ public: /// \return The parsed argument, or 0 if the argument is missing values /// (in which case Index still points at the conceptual next argument string /// to parse). - Arg *ParseOneArg(const ArgList &Args, unsigned &Index, - unsigned FlagsToInclude = 0, - unsigned FlagsToExclude = 0) const; + std::unique_ptr<Arg> ParseOneArg(const ArgList &Args, unsigned &Index, + unsigned FlagsToInclude = 0, + unsigned FlagsToExclude = 0) const; /// Parse an list of arguments into an InputArgList. /// diff --git a/llvm/include/llvm/Option/Option.h b/llvm/include/llvm/Option/Option.h index 196cf656355d..106f6863fca1 100644 --- a/llvm/include/llvm/Option/Option.h +++ b/llvm/include/llvm/Option/Option.h @@ -205,9 +205,9 @@ public: /// always be false. bool matches(OptSpecifier ID) const; - /// accept - Potentially accept the current argument, returning a - /// new Arg instance, or 0 if the option does not accept this - /// argument (or the argument is missing values). + /// Potentially accept the current argument, returning a new Arg instance, + /// or 0 if the option does not accept this argument (or the argument is + /// missing values). /// /// If the option accepts the current argument, accept() sets /// Index to the position where argument parsing should resume @@ -217,12 +217,12 @@ public: /// underlying storage to represent a Joined argument. /// \p GroupedShortOption If true, we are handling the fallback case of /// parsing a prefix of the current argument as a short option. - Arg *accept(const ArgList &Args, StringRef CurArg, bool GroupedShortOption, - unsigned &Index) const; + std::unique_ptr<Arg> accept(const ArgList &Args, StringRef CurArg, + bool GroupedShortOption, unsigned &Index) const; private: - Arg *acceptInternal(const ArgList &Args, StringRef CurArg, - unsigned &Index) const; + std::unique_ptr<Arg> acceptInternal(const ArgList &Args, StringRef CurArg, + unsigned &Index) const; public: void print(raw_ostream &O) const; diff --git a/llvm/include/llvm/Passes/OptimizationLevel.h b/llvm/include/llvm/Passes/OptimizationLevel.h new file mode 100644 index 000000000000..d2c3fde4935f --- /dev/null +++ b/llvm/include/llvm/Passes/OptimizationLevel.h @@ -0,0 +1,127 @@ +//===-------- LLVM-provided High-Level Optimization levels -*- C++ -*------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This header enumerates the LLVM-provided high-level optimization levels. +/// Each level has a specific goal and rationale. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PASSES_OPTIMIZATIONLEVEL_H +#define LLVM_PASSES_OPTIMIZATIONLEVEL_H + +#include <assert.h> + +namespace llvm { + +class OptimizationLevel final { + unsigned SpeedLevel = 2; + unsigned SizeLevel = 0; + OptimizationLevel(unsigned SpeedLevel, unsigned SizeLevel) + : SpeedLevel(SpeedLevel), SizeLevel(SizeLevel) { + // Check that only valid combinations are passed. + assert(SpeedLevel <= 3 && + "Optimization level for speed should be 0, 1, 2, or 3"); + assert(SizeLevel <= 2 && + "Optimization level for size should be 0, 1, or 2"); + assert((SizeLevel == 0 || SpeedLevel == 2) && + "Optimize for size should be encoded with speedup level == 2"); + } + +public: + OptimizationLevel() = default; + /// Disable as many optimizations as possible. This doesn't completely + /// disable the optimizer in all cases, for example always_inline functions + /// can be required to be inlined for correctness. + static const OptimizationLevel O0; + + /// Optimize quickly without destroying debuggability. + /// + /// This level is tuned to produce a result from the optimizer as quickly + /// as possible and to avoid destroying debuggability. This tends to result + /// in a very good development mode where the compiled code will be + /// immediately executed as part of testing. As a consequence, where + /// possible, we would like to produce efficient-to-execute code, but not + /// if it significantly slows down compilation or would prevent even basic + /// debugging of the resulting binary. + /// + /// As an example, complex loop transformations such as versioning, + /// vectorization, or fusion don't make sense here due to the degree to + /// which the executed code differs from the source code, and the compile + /// time cost. + static const OptimizationLevel O1; + /// Optimize for fast execution as much as possible without triggering + /// significant incremental compile time or code size growth. + /// + /// The key idea is that optimizations at this level should "pay for + /// themselves". So if an optimization increases compile time by 5% or + /// increases code size by 5% for a particular benchmark, that benchmark + /// should also be one which sees a 5% runtime improvement. If the compile + /// time or code size penalties happen on average across a diverse range of + /// LLVM users' benchmarks, then the improvements should as well. + /// + /// And no matter what, the compile time needs to not grow superlinearly + /// with the size of input to LLVM so that users can control the runtime of + /// the optimizer in this mode. + /// + /// This is expected to be a good default optimization level for the vast + /// majority of users. + static const OptimizationLevel O2; + /// Optimize for fast execution as much as possible. + /// + /// This mode is significantly more aggressive in trading off compile time + /// and code size to get execution time improvements. The core idea is that + /// this mode should include any optimization that helps execution time on + /// balance across a diverse collection of benchmarks, even if it increases + /// code size or compile time for some benchmarks without corresponding + /// improvements to execution time. + /// + /// Despite being willing to trade more compile time off to get improved + /// execution time, this mode still tries to avoid superlinear growth in + /// order to make even significantly slower compile times at least scale + /// reasonably. This does not preclude very substantial constant factor + /// costs though. + static const OptimizationLevel O3; + /// Similar to \c O2 but tries to optimize for small code size instead of + /// fast execution without triggering significant incremental execution + /// time slowdowns. + /// + /// The logic here is exactly the same as \c O2, but with code size and + /// execution time metrics swapped. + /// + /// A consequence of the different core goal is that this should in general + /// produce substantially smaller executables that still run in + /// a reasonable amount of time. + static const OptimizationLevel Os; + /// A very specialized mode that will optimize for code size at any and all + /// costs. + /// + /// This is useful primarily when there are absolute size limitations and + /// any effort taken to reduce the size is worth it regardless of the + /// execution time impact. You should expect this level to produce rather + /// slow, but very small, code. + static const OptimizationLevel Oz; + + bool isOptimizingForSpeed() const { return SizeLevel == 0 && SpeedLevel > 0; } + + bool isOptimizingForSize() const { return SizeLevel > 0; } + + bool operator==(const OptimizationLevel &Other) const { + return SizeLevel == Other.SizeLevel && SpeedLevel == Other.SpeedLevel; + } + bool operator!=(const OptimizationLevel &Other) const { + return SizeLevel != Other.SizeLevel || SpeedLevel != Other.SpeedLevel; + } + + unsigned getSpeedupLevel() const { return SpeedLevel; } + + unsigned getSizeLevel() const { return SizeLevel; } +}; +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index fae3e2cd2e0b..7c7883e98183 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -18,9 +18,12 @@ #include "llvm/ADT/Optional.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/IR/PassManager.h" +#include "llvm/Passes/OptimizationLevel.h" #include "llvm/Support/Error.h" +#include "llvm/Support/PGOOptions.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/Inliner.h" +#include "llvm/Transforms/IPO/ModuleInliner.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include <vector> @@ -31,57 +34,6 @@ class AAManager; class TargetMachine; class ModuleSummaryIndex; -/// A struct capturing PGO tunables. -struct PGOOptions { - enum PGOAction { NoAction, IRInstr, IRUse, SampleUse }; - enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse }; - PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "", - std::string ProfileRemappingFile = "", PGOAction Action = NoAction, - CSPGOAction CSAction = NoCSAction, - bool DebugInfoForProfiling = false, - bool PseudoProbeForProfiling = false) - : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), - ProfileRemappingFile(ProfileRemappingFile), Action(Action), - CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling || - (Action == SampleUse && - !PseudoProbeForProfiling)), - PseudoProbeForProfiling(PseudoProbeForProfiling) { - // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can - // callback with IRUse action without ProfileFile. - - // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse. - assert(this->CSAction == NoCSAction || - (this->Action != IRInstr && this->Action != SampleUse)); - - // For CSIRInstr, CSProfileGenFile also needs to be nonempty. - assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty()); - - // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share - // a profile. - assert(this->CSAction != CSIRUse || this->Action == IRUse); - - // If neither Action nor CSAction, DebugInfoForProfiling or - // PseudoProbeForProfiling needs to be true. - assert(this->Action != NoAction || this->CSAction != NoCSAction || - this->DebugInfoForProfiling || this->PseudoProbeForProfiling); - - // Pseudo probe emission does not work with -fdebug-info-for-profiling since - // they both use the discriminator field of debug lines but for different - // purposes. - if (this->DebugInfoForProfiling && this->PseudoProbeForProfiling) { - report_fatal_error( - "Pseudo probes cannot be used with -debug-info-for-profiling", false); - } - } - std::string ProfileFile; - std::string CSProfileGenFile; - std::string ProfileRemappingFile; - PGOAction Action; - CSPGOAction CSAction; - bool DebugInfoForProfiling; - bool PseudoProbeForProfiling; -}; - /// Tunable parameters for passes in the default pipelines. class PipelineTuningOptions { public: @@ -122,6 +74,15 @@ public: /// Tuning option to enable/disable function merging. Its default value is /// false. bool MergeFunctions; + + // Experimental option to eagerly invalidate more analyses. This has the + // potential to decrease max memory usage in exchange for more compile time. + // This may affect codegen due to either passes using analyses only when + // cached, or invalidating and recalculating an analysis that was + // stale/imprecise but still valid. Currently this invalidates all function + // analyses after various module->function or cgscc->function adaptors in the + // default pipelines. + bool EagerlyInvalidateAnalyses; }; /// This class provides access to building LLVM's passes. @@ -150,116 +111,6 @@ public: std::vector<PipelineElement> InnerPipeline; }; - /// LLVM-provided high-level optimization levels. - /// - /// This enumerates the LLVM-provided high-level optimization levels. Each - /// level has a specific goal and rationale. - class OptimizationLevel final { - unsigned SpeedLevel = 2; - unsigned SizeLevel = 0; - OptimizationLevel(unsigned SpeedLevel, unsigned SizeLevel) - : SpeedLevel(SpeedLevel), SizeLevel(SizeLevel) { - // Check that only valid combinations are passed. - assert(SpeedLevel <= 3 && - "Optimization level for speed should be 0, 1, 2, or 3"); - assert(SizeLevel <= 2 && - "Optimization level for size should be 0, 1, or 2"); - assert((SizeLevel == 0 || SpeedLevel == 2) && - "Optimize for size should be encoded with speedup level == 2"); - } - - public: - OptimizationLevel() = default; - /// Disable as many optimizations as possible. This doesn't completely - /// disable the optimizer in all cases, for example always_inline functions - /// can be required to be inlined for correctness. - static const OptimizationLevel O0; - - /// Optimize quickly without destroying debuggability. - /// - /// This level is tuned to produce a result from the optimizer as quickly - /// as possible and to avoid destroying debuggability. This tends to result - /// in a very good development mode where the compiled code will be - /// immediately executed as part of testing. As a consequence, where - /// possible, we would like to produce efficient-to-execute code, but not - /// if it significantly slows down compilation or would prevent even basic - /// debugging of the resulting binary. - /// - /// As an example, complex loop transformations such as versioning, - /// vectorization, or fusion don't make sense here due to the degree to - /// which the executed code differs from the source code, and the compile - /// time cost. - static const OptimizationLevel O1; - /// Optimize for fast execution as much as possible without triggering - /// significant incremental compile time or code size growth. - /// - /// The key idea is that optimizations at this level should "pay for - /// themselves". So if an optimization increases compile time by 5% or - /// increases code size by 5% for a particular benchmark, that benchmark - /// should also be one which sees a 5% runtime improvement. If the compile - /// time or code size penalties happen on average across a diverse range of - /// LLVM users' benchmarks, then the improvements should as well. - /// - /// And no matter what, the compile time needs to not grow superlinearly - /// with the size of input to LLVM so that users can control the runtime of - /// the optimizer in this mode. - /// - /// This is expected to be a good default optimization level for the vast - /// majority of users. - static const OptimizationLevel O2; - /// Optimize for fast execution as much as possible. - /// - /// This mode is significantly more aggressive in trading off compile time - /// and code size to get execution time improvements. The core idea is that - /// this mode should include any optimization that helps execution time on - /// balance across a diverse collection of benchmarks, even if it increases - /// code size or compile time for some benchmarks without corresponding - /// improvements to execution time. - /// - /// Despite being willing to trade more compile time off to get improved - /// execution time, this mode still tries to avoid superlinear growth in - /// order to make even significantly slower compile times at least scale - /// reasonably. This does not preclude very substantial constant factor - /// costs though. - static const OptimizationLevel O3; - /// Similar to \c O2 but tries to optimize for small code size instead of - /// fast execution without triggering significant incremental execution - /// time slowdowns. - /// - /// The logic here is exactly the same as \c O2, but with code size and - /// execution time metrics swapped. - /// - /// A consequence of the different core goal is that this should in general - /// produce substantially smaller executables that still run in - /// a reasonable amount of time. - static const OptimizationLevel Os; - /// A very specialized mode that will optimize for code size at any and all - /// costs. - /// - /// This is useful primarily when there are absolute size limitations and - /// any effort taken to reduce the size is worth it regardless of the - /// execution time impact. You should expect this level to produce rather - /// slow, but very small, code. - static const OptimizationLevel Oz; - - bool isOptimizingForSpeed() const { - return SizeLevel == 0 && SpeedLevel > 0; - } - - bool isOptimizingForSize() const { return SizeLevel > 0; } - - bool operator==(const OptimizationLevel &Other) const { - return SizeLevel == Other.SizeLevel && SpeedLevel == Other.SpeedLevel; - } - bool operator!=(const OptimizationLevel &Other) const { - return SizeLevel != Other.SizeLevel || SpeedLevel != Other.SpeedLevel; - } - - unsigned getSpeedupLevel() const { return SpeedLevel; } - - unsigned getSizeLevel() const { return SizeLevel; } - }; - explicit PassBuilder(TargetMachine *TM = nullptr, PipelineTuningOptions PTO = PipelineTuningOptions(), Optional<PGOOptions> PGOOpt = None, @@ -346,6 +197,11 @@ public: ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase); + /// Construct the module pipeline that performs inlining with + /// module inliner pass. + ModuleInlinerPass buildModuleInlinerPipeline(OptimizationLevel Level, + ThinOrFullLTOPhase Phase); + /// Construct the core LLVM module optimization pipeline. /// /// This pipeline focuses on optimizing the execution speed of the IR. It diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index 2f573585e766..6cab4ce7d138 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -215,8 +215,6 @@ protected: virtual void handleFiltered(StringRef PassID, std::string &Name) = 0; // Called when an ignored pass is encountered. virtual void handleIgnored(StringRef PassID, std::string &Name) = 0; - // Called to compare the before and after representations of the IR. - virtual bool same(const IRUnitT &Before, const IRUnitT &After) = 0; // Stack of IRs before passes. std::vector<IRUnitT> BeforeStack; @@ -269,50 +267,47 @@ protected: void handleAfter(StringRef PassID, std::string &Name, const std::string &Before, const std::string &After, Any) override; - // Called to compare the before and after representations of the IR. - bool same(const std::string &Before, const std::string &After) override; }; -// The following classes hold a representation of the IR for a change -// reporter that uses string comparisons of the basic blocks -// that are created using print (ie, similar to dump()). -// These classes respect the filtering of passes and functions using -// -filter-passes and -filter-print-funcs. -// // Information that needs to be saved for a basic block in order to compare // before and after the pass to determine if it was changed by a pass. -class ChangedBlockData { +template <typename T> class BlockDataT { public: - ChangedBlockData(const BasicBlock &B); - - bool operator==(const ChangedBlockData &That) const { - return Body == That.Body; - } - bool operator!=(const ChangedBlockData &That) const { - return Body != That.Body; + BlockDataT(const BasicBlock &B) : Label(B.getName().str()), Data(B) { + raw_string_ostream SS(Body); + B.print(SS, nullptr, true, true); } + bool operator==(const BlockDataT &That) const { return Body == That.Body; } + bool operator!=(const BlockDataT &That) const { return Body != That.Body; } + // Return the label of the represented basic block. StringRef getLabel() const { return Label; } // Return the string representation of the basic block. StringRef getBody() const { return Body; } + // Return the associated data + const T &getData() const { return Data; } + protected: std::string Label; std::string Body; + + // Extra data associated with a basic block + T Data; }; -template <typename IRData> class OrderedChangedData { +template <typename T> class OrderedChangedData { public: // Return the names in the order they were saved std::vector<std::string> &getOrder() { return Order; } const std::vector<std::string> &getOrder() const { return Order; } // Return a map of names to saved representations - StringMap<IRData> &getData() { return Data; } - const StringMap<IRData> &getData() const { return Data; } + StringMap<T> &getData() { return Data; } + const StringMap<T> &getData() const { return Data; } - bool operator==(const OrderedChangedData<IRData> &That) const { + bool operator==(const OrderedChangedData<T> &That) const { return Data == That.getData(); } @@ -321,55 +316,64 @@ public: // with ones that are only in \p Before interspersed based on where they // occur in \p Before. This is used to present the output in an order // based on how the data is ordered in LLVM. - static void - report(const OrderedChangedData &Before, const OrderedChangedData &After, - function_ref<void(const IRData *, const IRData *)> HandlePair); + static void report(const OrderedChangedData &Before, + const OrderedChangedData &After, + function_ref<void(const T *, const T *)> HandlePair); protected: std::vector<std::string> Order; - StringMap<IRData> Data; + StringMap<T> Data; +}; + +// Do not need extra information for patch-style change reporter. +class EmptyData { +public: + EmptyData(const BasicBlock &) {} }; // The data saved for comparing functions. -using ChangedFuncData = OrderedChangedData<ChangedBlockData>; +template <typename T> +class FuncDataT : public OrderedChangedData<BlockDataT<T>> { +public: + FuncDataT(std::string S) : EntryBlockName(S) {} + + // Return the name of the entry block + std::string getEntryBlockName() const { return EntryBlockName; } + +protected: + std::string EntryBlockName; +}; -// A map of names to the saved data. -using ChangedIRData = OrderedChangedData<ChangedFuncData>; +// The data saved for comparing IRs. +template <typename T> +class IRDataT : public OrderedChangedData<FuncDataT<T>> {}; -// A class that compares two IRs and does a diff between them. The -// added lines are prefixed with a '+', the removed lines are prefixed -// with a '-' and unchanged lines are prefixed with a space (to have -// things line up). -class ChangedIRComparer { +// Abstract template base class for a class that compares two IRs. The +// class is created with the 2 IRs to compare and then compare is called. +// The static function analyzeIR is used to build up the IR representation. +template <typename T> class IRComparer { public: - ChangedIRComparer(raw_ostream &OS, const ChangedIRData &Before, - const ChangedIRData &After, bool ColourMode) - : Before(Before), After(After), Out(OS), UseColour(ColourMode) {} + IRComparer(const IRDataT<T> &Before, const IRDataT<T> &After) + : Before(Before), After(After) {} - // Compare the 2 IRs. - void compare(Any IR, StringRef Prefix, StringRef PassID, StringRef Name); + // Compare the 2 IRs. \p handleFunctionCompare is called to handle the + // compare of a function. When \p InModule is set, + // this function is being handled as part of comparing a module. + void compare( + bool CompareModule, + std::function<void(bool InModule, unsigned Minor, + const FuncDataT<T> &Before, const FuncDataT<T> &After)> + CompareFunc); // Analyze \p IR and build the IR representation in \p Data. - static void analyzeIR(Any IR, ChangedIRData &Data); + static void analyzeIR(Any IR, IRDataT<T> &Data); protected: - // Return the module when that is the appropriate level of - // comparison for \p IR. - static const Module *getModuleForComparison(Any IR); - // Generate the data for \p F into \p Data. - static bool generateFunctionData(ChangedIRData &Data, const Function &F); + static bool generateFunctionData(IRDataT<T> &Data, const Function &F); - // Called to handle the compare of a function. When \p InModule is set, - // this function is being handled as part of comparing a module. - void handleFunctionCompare(StringRef Name, StringRef Prefix, StringRef PassID, - bool InModule, const ChangedFuncData &Before, - const ChangedFuncData &After); - - const ChangedIRData &Before; - const ChangedIRData &After; - raw_ostream &Out; - bool UseColour; + const IRDataT<T> &Before; + const IRDataT<T> &After; }; // A change printer that prints out in-line differences in the basic @@ -378,25 +382,28 @@ protected: // and added, respectively. Changes to the IR that do not affect basic // blocks are not reported as having changed the IR. The option // -print-module-scope does not affect this change reporter. -class InLineChangePrinter : public TextChangeReporter<ChangedIRData> { +class InLineChangePrinter : public TextChangeReporter<IRDataT<EmptyData>> { public: InLineChangePrinter(bool VerboseMode, bool ColourMode) - : TextChangeReporter<ChangedIRData>(VerboseMode), UseColour(ColourMode) {} + : TextChangeReporter<IRDataT<EmptyData>>(VerboseMode), + UseColour(ColourMode) {} ~InLineChangePrinter() override; void registerCallbacks(PassInstrumentationCallbacks &PIC); protected: // Create a representation of the IR. virtual void generateIRRepresentation(Any IR, StringRef PassID, - ChangedIRData &Output) override; + IRDataT<EmptyData> &Output) override; // Called when an interesting IR has changed. virtual void handleAfter(StringRef PassID, std::string &Name, - const ChangedIRData &Before, - const ChangedIRData &After, Any) override; - // Called to compare the before and after representations of the IR. - virtual bool same(const ChangedIRData &Before, - const ChangedIRData &After) override; + const IRDataT<EmptyData> &Before, + const IRDataT<EmptyData> &After, Any) override; + + void handleFunctionCompare(StringRef Name, StringRef Prefix, StringRef PassID, + StringRef Divider, bool InModule, unsigned Minor, + const FuncDataT<EmptyData> &Before, + const FuncDataT<EmptyData> &After); bool UseColour; }; @@ -409,6 +416,81 @@ public: void registerCallbacks(PassInstrumentationCallbacks &PIC); }; +// Class that holds transitions between basic blocks. The transitions +// are contained in a map of values to names of basic blocks. +class DCData { +public: + // Fill the map with the transitions from basic block \p B. + DCData(const BasicBlock &B); + + // Return an iterator to the names of the successor blocks. + StringMap<std::string>::const_iterator begin() const { + return Successors.begin(); + } + StringMap<std::string>::const_iterator end() const { + return Successors.end(); + } + + // Return the label of the basic block reached on a transition on \p S. + const StringRef getSuccessorLabel(StringRef S) const { + assert(Successors.count(S) == 1 && "Expected to find successor."); + return Successors.find(S)->getValue(); + } + +protected: + // Add a transition to \p Succ on \p Label + void addSuccessorLabel(StringRef Succ, StringRef Label) { + std::pair<std::string, std::string> SS{Succ.str(), Label.str()}; + Successors.insert(SS); + } + + StringMap<std::string> Successors; +}; + +// A change reporter that builds a website with links to pdf files showing +// dot control flow graphs with changed instructions shown in colour. +class DotCfgChangeReporter : public ChangeReporter<IRDataT<DCData>> { +public: + DotCfgChangeReporter(bool Verbose); + ~DotCfgChangeReporter() override; + void registerCallbacks(PassInstrumentationCallbacks &PIC); + +protected: + // Initialize the HTML file and output the header. + bool initializeHTML(); + + // Called on the first IR processed. + void handleInitialIR(Any IR) override; + // Called before and after a pass to get the representation of the IR. + void generateIRRepresentation(Any IR, StringRef PassID, + IRDataT<DCData> &Output) override; + // Called when the pass is not iteresting. + void omitAfter(StringRef PassID, std::string &Name) override; + // Called when an interesting IR has changed. + void handleAfter(StringRef PassID, std::string &Name, + const IRDataT<DCData> &Before, const IRDataT<DCData> &After, + Any) override; + // Called when an interesting pass is invalidated. + void handleInvalidated(StringRef PassID) override; + // Called when the IR or pass is not interesting. + void handleFiltered(StringRef PassID, std::string &Name) override; + // Called when an ignored pass is encountered. + void handleIgnored(StringRef PassID, std::string &Name) override; + + // Generate the pdf file into \p Dir / \p PDFFileName using \p DotFile as + // input and return the html <a> tag with \Text as the content. + static std::string genHTML(StringRef Text, StringRef DotFile, + StringRef PDFFileName); + + void handleFunctionCompare(StringRef Name, StringRef Prefix, StringRef PassID, + StringRef Divider, bool InModule, unsigned Minor, + const FuncDataT<DCData> &Before, + const FuncDataT<DCData> &After); + + unsigned N = 0; + std::unique_ptr<raw_fd_ostream> HTML; +}; + /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -421,6 +503,7 @@ class StandardInstrumentations { IRChangedPrinter PrintChangedIR; PseudoProbeVerifier PseudoProbeVerification; InLineChangePrinter PrintChangedDiff; + DotCfgChangeReporter WebsiteChangeReporter; VerifyInstrumentation Verify; bool VerifyEach; @@ -440,8 +523,12 @@ public: extern template class ChangeReporter<std::string>; extern template class TextChangeReporter<std::string>; -extern template class ChangeReporter<ChangedIRData>; -extern template class TextChangeReporter<ChangedIRData>; +extern template class BlockDataT<EmptyData>; +extern template class FuncDataT<EmptyData>; +extern template class IRDataT<EmptyData>; +extern template class ChangeReporter<IRDataT<EmptyData>>; +extern template class TextChangeReporter<IRDataT<EmptyData>>; +extern template class IRComparer<EmptyData>; } // namespace llvm diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index 8f336c13af61..d3a5d44ce8dd 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -693,8 +693,9 @@ public: /// An iterator over the \c LineCoverageStats objects for lines described by /// a \c CoverageData instance. class LineCoverageIterator - : public iterator_facade_base< - LineCoverageIterator, std::forward_iterator_tag, LineCoverageStats> { + : public iterator_facade_base<LineCoverageIterator, + std::forward_iterator_tag, + const LineCoverageStats> { public: LineCoverageIterator(const CoverageData &CD) : LineCoverageIterator(CD, CD.begin()->Line) {} @@ -711,8 +712,6 @@ public: const LineCoverageStats &operator*() const { return Stats; } - LineCoverageStats &operator*() { return Stats; } - LineCoverageIterator &operator++(); LineCoverageIterator getEnd() const { diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 08a934e6985f..4395c2abb33e 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -205,9 +205,9 @@ StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName = "<unknown>"); /// Given a vector of strings (function PGO names) \c NameStrs, the -/// method generates a combined string \c Result thatis ready to be +/// method generates a combined string \c Result that is ready to be /// serialized. The \c Result string is comprised of three fields: -/// The first field is the legnth of the uncompressed strings, and the +/// The first field is the length of the uncompressed strings, and the /// the second field is the length of the zlib-compressed string. /// Both fields are encoded in ULEB128. If \c doCompress is false, the /// third field is the uncompressed strings; otherwise it is the @@ -308,7 +308,8 @@ inline std::error_code make_error_code(instrprof_error E) { class InstrProfError : public ErrorInfo<InstrProfError> { public: - InstrProfError(instrprof_error Err) : Err(Err) { + InstrProfError(instrprof_error Err, const Twine &ErrStr = Twine()) + : Err(Err), Msg(ErrStr.str()) { assert(Err != instrprof_error::success && "Not an error"); } @@ -321,6 +322,7 @@ public: } instrprof_error get() const { return Err; } + const std::string &getMessage() const { return Msg; } /// Consume an Error and return the raw enum value contained within it. The /// Error must either be a success value, or contain a single InstrProfError. @@ -337,6 +339,7 @@ public: private: instrprof_error Err; + std::string Msg; }; class SoftInstrProfErrors { @@ -474,7 +477,8 @@ public: /// is used by the raw and text profile readers. Error addFuncName(StringRef FuncName) { if (FuncName.empty()) - return make_error<InstrProfError>(instrprof_error::malformed); + return make_error<InstrProfError>(instrprof_error::malformed, + "function name is empty"); auto Ins = NameTab.insert(FuncName); if (Ins.second) { MD5NameMap.push_back(std::make_pair( @@ -1104,6 +1108,8 @@ namespace RawInstrProf { // Version 5: Bit 60 of FuncHash is reserved for the flag for the context // sensitive records. // Version 6: Added binary id. +// Version 7: Reorder binary id and include version in signature. +// Version 8: Use relative counter pointer. const uint64_t Version = INSTR_PROF_RAW_VERSION; template <class IntPtrT> inline uint64_t getMagic(); @@ -1142,8 +1148,8 @@ void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime // aware this is an ir_level profile so it can set the version flag. -void createIRLevelProfileFlagVar(Module &M, bool IsCS, - bool InstrEntryBBEnabled); +GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS, + bool InstrEntryBBEnabled); // Create the variable for the profile file name. void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput); diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index 08a642469627..008b8dde5820 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -75,9 +75,7 @@ INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \ INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \ ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ Inc->getHash()->getZExtValue())) -INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \ - ConstantExpr::getBitCast(CounterPtr, \ - llvm::Type::getInt64PtrTy(Ctx))) +INSTR_PROF_DATA(const IntPtrT, IntPtrTy, CounterPtr, RelativeCounterPtr) /* This is used to map function pointers for the indirect call targets to * function name hashes during the conversion from raw to merged profile * data. @@ -129,15 +127,16 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \ #endif INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic()) INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version()) +INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters) INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize) INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterCounters, PaddingBytesAfterCounters) INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize) -INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin) +INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, + (uintptr_t)CountersBegin - (uintptr_t)DataBegin) INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin) INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last) -INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL)) #undef INSTR_PROF_RAW_HEADER /* INSTR_PROF_RAW_HEADER end */ @@ -646,7 +645,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 /* Raw profile format version (start from 1). */ -#define INSTR_PROF_RAW_VERSION 6 +#define INSTR_PROF_RAW_VERSION 8 /* Indexed profile format version (start from 1). */ #define INSTR_PROF_INDEX_VERSION 7 /* Coverage mapping format version (start from 0). */ diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index 501c6f011d53..b62d4ff044a3 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -71,6 +71,7 @@ public: /// format. Provides an iterator over NamedInstrProfRecords. class InstrProfReader { instrprof_error LastError = instrprof_error::success; + std::string LastErrorMsg; public: InstrProfReader() = default; @@ -114,14 +115,21 @@ protected: std::unique_ptr<InstrProfSymtab> Symtab; /// Set the current error and return same. - Error error(instrprof_error Err) { + Error error(instrprof_error Err, const std::string &ErrMsg = "") { LastError = Err; + LastErrorMsg = ErrMsg; if (Err == instrprof_error::success) return Error::success(); - return make_error<InstrProfError>(Err); + return make_error<InstrProfError>(Err, ErrMsg); } - Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); } + Error error(Error &&E) { + handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { + LastError = IPE.get(); + LastErrorMsg = IPE.getMessage(); + }); + return make_error<InstrProfError>(LastError, LastErrorMsg); + } /// Clear the current error and return a successful one. Error success() { return error(instrprof_error::success); } @@ -136,7 +144,7 @@ public: /// Get the current error. Error getError() { if (hasError()) - return make_error<InstrProfError>(LastError); + return make_error<InstrProfError>(LastError, LastErrorMsg); return Error::success(); } @@ -197,7 +205,7 @@ public: /// Reader for the raw instrprof binary format from runtime. /// -/// This format is a raw memory dump of the instrumentation-baed profiling data +/// This format is a raw memory dump of the instrumentation-based profiling data /// from the runtime. It has no index. /// /// Templated on the unsigned type whose size matches pointers on the platform diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h index f2d9ccc45fdc..ad92af22d92e 100644 --- a/llvm/include/llvm/ProfileData/ProfileCommon.h +++ b/llvm/include/llvm/ProfileData/ProfileCommon.h @@ -66,9 +66,9 @@ public: /// Find the summary entry for a desired percentile of counts. static const ProfileSummaryEntry & - getEntryForPercentile(SummaryEntryVector &DS, uint64_t Percentile); - static uint64_t getHotCountThreshold(SummaryEntryVector &DS); - static uint64_t getColdCountThreshold(SummaryEntryVector &DS); + getEntryForPercentile(const SummaryEntryVector &DS, uint64_t Percentile); + static uint64_t getHotCountThreshold(const SummaryEntryVector &DS); + static uint64_t getColdCountThreshold(const SummaryEntryVector &DS); }; class InstrProfSummaryBuilder final : public ProfileSummaryBuilder { @@ -92,8 +92,8 @@ public: void addRecord(const sampleprof::FunctionSamples &FS, bool isCallsiteSample = false); - std::unique_ptr<ProfileSummary> computeSummaryForProfiles( - const StringMap<sampleprof::FunctionSamples> &Profiles); + std::unique_ptr<ProfileSummary> + computeSummaryForProfiles(const sampleprof::SampleProfileMap &Profiles); std::unique_ptr<ProfileSummary> getSummary(); }; diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 2f71bbc6bbbe..7ac9eccf8ac2 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -29,10 +29,13 @@ #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cstdint> +#include <list> #include <map> #include <set> +#include <sstream> #include <string> #include <system_error> +#include <unordered_map> #include <utility> namespace llvm { @@ -104,10 +107,10 @@ static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) { /// current Format uses MD5 to represent the string. static inline StringRef getRepInFormat(StringRef Name, bool UseMD5, std::string &GUIDBuf) { - if (Name.empty()) + if (Name.empty() || !UseMD5) return Name; GUIDBuf = std::to_string(Function::getGUID(Name)); - return UseMD5 ? StringRef(GUIDBuf) : Name; + return GUIDBuf; } static inline uint64_t SPVersion() { return 103; } @@ -122,13 +125,14 @@ enum SecType { SecProfileSymbolList = 3, SecFuncOffsetTable = 4, SecFuncMetadata = 5, + SecCSNameTable = 6, // marker for the first type of profile. SecFuncProfileFirst = 32, SecLBRProfile = SecFuncProfileFirst }; static inline std::string getSecName(SecType Type) { - switch (Type) { + switch ((int)Type) { // Avoid -Wcovered-switch-default case SecInValid: return "InvalidSection"; case SecProfSummary: @@ -141,10 +145,13 @@ static inline std::string getSecName(SecType Type) { return "FuncOffsetTableSection"; case SecFuncMetadata: return "FunctionMetadata"; + case SecCSNameTable: + return "CSNameTableSection"; case SecLBRProfile: return "LBRProfileSection"; + default: + return "UnknownSection"; } - llvm_unreachable("A SecType has no name for output"); } // Entry type of section header table used by SampleProfileExtBinaryBaseReader @@ -202,6 +209,13 @@ enum class SecFuncMetadataFlags : uint32_t { SecFlagHasAttribute = (1 << 1) }; +enum class SecFuncOffsetFlags : uint32_t { + SecFlagInvalid = 0, + // Store function offsets in an order of contexts. The order ensures that + // callee contexts of a given context laid out next to it. + SecFlagOrdered = (1 << 0), +}; + // Verify section specific flag is used for the correct section. template <class SecFlagType> static inline void verifySecFlag(SecType Type, SecFlagType Flag) { @@ -222,6 +236,8 @@ static inline void verifySecFlag(SecType Type, SecFlagType Flag) { IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>(); break; default: + case SecFuncOffsetTable: + IsFlagLegal = std::is_same<SecFuncOffsetFlags, SecFlagType>(); break; } if (!IsFlagLegal) @@ -396,54 +412,123 @@ enum ContextAttributeMask { ContextShouldBeInlined = 0x2, // Leaf of context should be inlined }; +// Represents a context frame with function name and line location +struct SampleContextFrame { + StringRef FuncName; + LineLocation Location; + + SampleContextFrame() : Location(0, 0) {} + + SampleContextFrame(StringRef FuncName, LineLocation Location) + : FuncName(FuncName), Location(Location) {} + + bool operator==(const SampleContextFrame &That) const { + return Location == That.Location && FuncName == That.FuncName; + } + + bool operator!=(const SampleContextFrame &That) const { + return !(*this == That); + } + + std::string toString(bool OutputLineLocation) const { + std::ostringstream OContextStr; + OContextStr << FuncName.str(); + if (OutputLineLocation) { + OContextStr << ":" << Location.LineOffset; + if (Location.Discriminator) + OContextStr << "." << Location.Discriminator; + } + return OContextStr.str(); + } +}; + +static inline hash_code hash_value(const SampleContextFrame &arg) { + return hash_combine(arg.FuncName, arg.Location.LineOffset, + arg.Location.Discriminator); +} + +using SampleContextFrameVector = SmallVector<SampleContextFrame, 10>; +using SampleContextFrames = ArrayRef<SampleContextFrame>; + +struct SampleContextFrameHash { + uint64_t operator()(const SampleContextFrameVector &S) const { + return hash_combine_range(S.begin(), S.end()); + } +}; + // Sample context for FunctionSamples. It consists of the calling context, // the function name and context state. Internally sample context is represented -// using StringRef, which is also the input for constructing a `SampleContext`. +// using ArrayRef, which is also the input for constructing a `SampleContext`. // It can accept and represent both full context string as well as context-less // function name. -// Example of full context string (note the wrapping `[]`): -// `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` -// Example of context-less function name (same as AutoFDO): -// `_Z8funcLeafi` +// For a CS profile, a full context vector can look like: +// `main:3 _Z5funcAi:1 _Z8funcLeafi` +// For a base CS profile without calling context, the context vector should only +// contain the leaf frame name. +// For a non-CS profile, the context vector should be empty. class SampleContext { public: SampleContext() : State(UnknownContext), Attributes(ContextNone) {} - SampleContext(StringRef ContextStr, ContextStateMask CState = UnknownContext) - : Attributes(ContextNone) { - setContext(ContextStr, CState); - } - // Promote context by removing top frames (represented by `ContextStrToRemove`). - // Note that with string representation of context, the promotion is effectively - // a substr operation with `ContextStrToRemove` removed from left. - void promoteOnPath(StringRef ContextStrToRemove) { - assert(FullContext.startswith(ContextStrToRemove)); + SampleContext(StringRef Name) + : Name(Name), State(UnknownContext), Attributes(ContextNone) {} - // Remove leading context and frame separator " @ ". - FullContext = FullContext.substr(ContextStrToRemove.size() + 3); - CallingContext = CallingContext.substr(ContextStrToRemove.size() + 3); + SampleContext(SampleContextFrames Context, + ContextStateMask CState = RawContext) + : Attributes(ContextNone) { + assert(!Context.empty() && "Context is empty"); + setContext(Context, CState); } - // Split the top context frame (left-most substr) from context. - static std::pair<StringRef, StringRef> - splitContextString(StringRef ContextStr) { - return ContextStr.split(" @ "); + // Give a context string, decode and populate internal states like + // Function name, Calling context and context state. Example of input + // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` + SampleContext(StringRef ContextStr, + std::list<SampleContextFrameVector> &CSNameTable, + ContextStateMask CState = RawContext) + : Attributes(ContextNone) { + assert(!ContextStr.empty()); + // Note that `[]` wrapped input indicates a full context string, otherwise + // it's treated as context-less function name only. + bool HasContext = ContextStr.startswith("["); + if (!HasContext) { + State = UnknownContext; + Name = ContextStr; + } else { + CSNameTable.emplace_back(); + SampleContextFrameVector &Context = CSNameTable.back(); + createCtxVectorFromStr(ContextStr, Context); + setContext(Context, CState); + } } - // Reconstruct a new context with the last k frames, return the context-less - // name if K = 1 - StringRef getContextWithLastKFrames(uint32_t K) { - if (K == 1) - return getNameWithoutContext(); - - size_t I = FullContext.size(); - while (K--) { - I = FullContext.find_last_of(" @ ", I); - if (I == StringRef::npos) - return FullContext; - I -= 2; + /// Create a context vector from a given context string and save it in + /// `Context`. + static void createCtxVectorFromStr(StringRef ContextStr, + SampleContextFrameVector &Context) { + // Remove encapsulating '[' and ']' if any + ContextStr = ContextStr.substr(1, ContextStr.size() - 2); + StringRef ContextRemain = ContextStr; + StringRef ChildContext; + StringRef CalleeName; + while (!ContextRemain.empty()) { + auto ContextSplit = ContextRemain.split(" @ "); + ChildContext = ContextSplit.first; + ContextRemain = ContextSplit.second; + LineLocation CallSiteLoc(0, 0); + decodeContextString(ChildContext, CalleeName, CallSiteLoc); + Context.emplace_back(CalleeName, CallSiteLoc); } - return FullContext.slice(I + 3, StringRef::npos); + } + + // Promote context by removing top frames with the length of + // `ContextFramesToRemove`. Note that with array representation of context, + // the promotion is effectively a slice operation with first + // `ContextFramesToRemove` elements removed from left. + void promoteOnPath(uint32_t ContextFramesToRemove) { + assert(ContextFramesToRemove <= FullContext.size() && + "Cannot remove more than the whole context"); + FullContext = FullContext.drop_front(ContextFramesToRemove); } // Decode context string for a frame to get function name and location. @@ -469,7 +554,7 @@ public: } } - operator StringRef() const { return FullContext; } + operator SampleContextFrames() const { return FullContext; } bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; } void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; } uint32_t getAllAttributes() { return Attributes; } @@ -478,60 +563,114 @@ public: void setState(ContextStateMask S) { State |= (uint32_t)S; } void clearState(ContextStateMask S) { State &= (uint32_t)~S; } bool hasContext() const { return State != UnknownContext; } - bool isBaseContext() const { return CallingContext.empty(); } - StringRef getNameWithoutContext() const { return Name; } - StringRef getCallingContext() const { return CallingContext; } - StringRef getNameWithContext() const { return FullContext; } - -private: - // Give a context string, decode and populate internal states like - // Function name, Calling context and context state. Example of input - // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]` - void setContext(StringRef ContextStr, ContextStateMask CState) { - assert(!ContextStr.empty()); - // Note that `[]` wrapped input indicates a full context string, otherwise - // it's treated as context-less function name only. - bool HasContext = ContextStr.startswith("["); - if (!HasContext && CState == UnknownContext) { - State = UnknownContext; - Name = FullContext = ContextStr; - } else { - // Assume raw context profile if unspecified - if (CState == UnknownContext) - State = RawContext; - else - State = CState; - - // Remove encapsulating '[' and ']' if any - if (HasContext) - FullContext = ContextStr.substr(1, ContextStr.size() - 2); - else - FullContext = ContextStr; - - // Caller is to the left of callee in context string - auto NameContext = FullContext.rsplit(" @ "); - if (NameContext.second.empty()) { - Name = NameContext.first; - CallingContext = NameContext.second; - } else { - Name = NameContext.second; - CallingContext = NameContext.first; + bool isBaseContext() const { return FullContext.size() == 1; } + StringRef getName() const { return Name; } + SampleContextFrames getContextFrames() const { return FullContext; } + + static std::string getContextString(SampleContextFrames Context, + bool IncludeLeafLineLocation = false) { + std::ostringstream OContextStr; + for (uint32_t I = 0; I < Context.size(); I++) { + if (OContextStr.str().size()) { + OContextStr << " @ "; } + OContextStr << Context[I].toString(I != Context.size() - 1 || + IncludeLeafLineLocation); } + return OContextStr.str(); + } + + std::string toString() const { + if (!hasContext()) + return Name.str(); + return getContextString(FullContext, false); + } + + uint64_t getHashCode() const { + return hasContext() ? hash_value(getContextFrames()) + : hash_value(getName()); + } + + /// Set the name of the function. + void setName(StringRef FunctionName) { + assert(FullContext.empty() && + "setName should only be called for non-CS profile"); + Name = FunctionName; + } + + void setContext(SampleContextFrames Context, + ContextStateMask CState = RawContext) { + assert(CState != UnknownContext); + FullContext = Context; + Name = Context.back().FuncName; + State = CState; + } + + bool operator==(const SampleContext &That) const { + return State == That.State && Name == That.Name && + FullContext == That.FullContext; + } + + bool operator!=(const SampleContext &That) const { return !(*this == That); } + + bool operator<(const SampleContext &That) const { + if (State != That.State) + return State < That.State; + + if (!hasContext()) { + return (Name.compare(That.Name)) == -1; + } + + uint64_t I = 0; + while (I < std::min(FullContext.size(), That.FullContext.size())) { + auto &Context1 = FullContext[I]; + auto &Context2 = That.FullContext[I]; + auto V = Context1.FuncName.compare(Context2.FuncName); + if (V) + return V == -1; + if (Context1.Location != Context2.Location) + return Context1.Location < Context2.Location; + I++; + } + + return FullContext.size() < That.FullContext.size(); + } + + struct Hash { + uint64_t operator()(const SampleContext &Context) const { + return Context.getHashCode(); + } + }; + + bool IsPrefixOf(const SampleContext &That) const { + auto ThisContext = FullContext; + auto ThatContext = That.FullContext; + if (ThatContext.size() < ThisContext.size()) + return false; + ThatContext = ThatContext.take_front(ThisContext.size()); + // Compare Leaf frame first + if (ThisContext.back().FuncName != ThatContext.back().FuncName) + return false; + // Compare leading context + return ThisContext.drop_back() == ThatContext.drop_back(); } - // Full context string including calling context and leaf function name - StringRef FullContext; - // Function name for the associated sample profile +private: + /// Mangled name of the function. StringRef Name; - // Calling context (leaf function excluded) for the associated sample profile - StringRef CallingContext; + // Full context including calling context and leaf function name + SampleContextFrames FullContext; // State of the associated sample profile uint32_t State; // Attribute of the associated sample profile uint32_t Attributes; }; +static inline hash_code hash_value(const SampleContext &arg) { + return arg.hasContext() ? hash_value(arg.getContextFrames()) + : hash_value(arg.getName()); +} + class FunctionSamples; class SampleProfileReaderItaniumRemapper; @@ -592,6 +731,20 @@ public: return BodySamples[LineLocation(Index, 0)].merge(S, Weight); } + // Accumulate all body samples to set total samples. + void updateTotalSamples() { + setTotalSamples(0); + for (const auto &I : BodySamples) + addTotalSamples(I.second.getSamples()); + + for (auto &I : CallsiteSamples) { + for (auto &CS : I.second) { + CS.second.updateTotalSamples(); + addTotalSamples(CS.second.getTotalSamples()); + } + } + } + /// Return the number of samples collected at the given location. /// Each location is specified by \p LineOffset and \p Discriminator. /// If the location is not found in profile, return error. @@ -709,10 +862,9 @@ public: /// Optionally scale samples by \p Weight. sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) { sampleprof_error Result = sampleprof_error::success; - Name = Other.getName(); if (!GUIDToFuncNameMap) GUIDToFuncNameMap = Other.GUIDToFuncNameMap; - if (Context.getNameWithContext().empty()) + if (Context.getName().empty()) Context = Other.getContext(); if (FunctionHash == 0) { // Set the function hash code for the target profile. @@ -758,7 +910,7 @@ public: }; if (isDeclaration(SymbolMap.lookup(getFuncName()))) { // Add to the import list only when it's defined out of module. - S.insert(getGUID(Name)); + S.insert(getGUID(getName())); } // Import hot CallTargets, which may not be available in IR because full // profile annotation cannot be done until backend compilation in ThinLTO. @@ -775,18 +927,13 @@ public: } /// Set the name of the function. - void setName(StringRef FunctionName) { Name = FunctionName; } + void setName(StringRef FunctionName) { Context.setName(FunctionName); } /// Return the function name. - StringRef getName() const { return Name; } - - /// Return function name with context. - StringRef getNameWithContext() const { - return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name; - } + StringRef getName() const { return Context.getName(); } /// Return the original function name. - StringRef getFuncName() const { return getFuncName(Name); } + StringRef getFuncName() const { return getFuncName(getName()); } void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; } @@ -913,9 +1060,6 @@ public: void findAllNames(DenseSet<StringRef> &NameSet) const; private: - /// Mangled name of the function. - StringRef Name; - /// CFG hash value for the function. uint64_t FunctionHash = 0; @@ -961,6 +1105,14 @@ private: raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS); +using SampleProfileMap = + std::unordered_map<SampleContext, FunctionSamples, SampleContext::Hash>; + +using NameFunctionSamples = std::pair<SampleContext, const FunctionSamples *>; + +void sortFuncProfiles(const SampleProfileMap &ProfileMap, + std::vector<NameFunctionSamples> &SortedProfiles); + /// Sort a LocationT->SampleT map by LocationT. /// /// It produces a sorted list of <LocationT, SampleT> records by ascending @@ -989,18 +1141,24 @@ private: /// sure ProfileMap's key is consistent with FunctionSample's name/context. class SampleContextTrimmer { public: - SampleContextTrimmer(StringMap<FunctionSamples> &Profiles) - : ProfileMap(Profiles){}; - // Trim and merge cold context profile when requested. + SampleContextTrimmer(SampleProfileMap &Profiles) : ProfileMap(Profiles){}; + // Trim and merge cold context profile when requested. TrimBaseProfileOnly + // should only be effective when TrimColdContext is true. On top of + // TrimColdContext, TrimBaseProfileOnly can be used to specify to trim all + // cold profiles or only cold base profiles. Trimming base profiles only is + // mainly to honor the preinliner decsion. Note that when MergeColdContext is + // true, preinliner decsion is not honored anyway so TrimBaseProfileOnly will + // be ignored. void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext, - uint32_t ColdContextFrameLength); + uint32_t ColdContextFrameLength, + bool TrimBaseProfileOnly); // Canonicalize context profile name and attributes. void canonicalizeContextProfiles(); private: - StringMap<FunctionSamples> &ProfileMap; + SampleProfileMap &ProfileMap; }; /// ProfileSymbolList records the list of function symbols shown up @@ -1045,6 +1203,22 @@ private: }; } // end namespace sampleprof + +using namespace sampleprof; +// Provide DenseMapInfo for SampleContext. +template <> struct DenseMapInfo<SampleContext> { + static inline SampleContext getEmptyKey() { return SampleContext(); } + + static inline SampleContext getTombstoneKey() { return SampleContext("@"); } + + static unsigned getHashValue(const SampleContext &Val) { + return Val.getHashCode(); + } + + static bool isEqual(const SampleContext &LHS, const SampleContext &RHS) { + return LHS == RHS; + } +}; } // end namespace llvm #endif // LLVM_PROFILEDATA_SAMPLEPROF_H diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h index 2d5925bdb2b4..e6d31f1b9098 100644 --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -242,9 +242,11 @@ #include "llvm/Support/SymbolRemappingReader.h" #include <algorithm> #include <cstdint> +#include <list> #include <memory> #include <string> #include <system_error> +#include <unordered_set> #include <vector> namespace llvm { @@ -380,8 +382,8 @@ public: /// The implementaion to read sample profiles from the associated file. virtual std::error_code readImpl() = 0; - /// Print the profile for \p FName on stream \p OS. - void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs()); + /// Print the profile for \p FContext on stream \p OS. + void dumpFunctionProfile(SampleContext FContext, raw_ostream &OS = dbgs()); /// Collect functions with definitions in Module M. For reader which /// support loading function profiles on demand, return true when the @@ -407,6 +409,13 @@ public: std::string FGUID; StringRef CanonName = FunctionSamples::getCanonicalFnName(F); CanonName = getRepInFormat(CanonName, useMD5(), FGUID); + auto It = Profiles.find(CanonName); + if (It != Profiles.end()) + return &It->second; + if (!FGUID.empty()) { + assert(useMD5() && "New name should only be generated for md5 profile"); + CanonName = *MD5NameBuffer.insert(FGUID).first; + } return &Profiles[CanonName]; } @@ -429,7 +438,7 @@ public: } /// Return all the profiles. - StringMap<FunctionSamples> &getProfiles() { return Profiles; } + SampleProfileMap &getProfiles() { return Profiles; } /// Report a parse error message. void reportError(int64_t LineNumber, const Twine &Msg) const { @@ -495,7 +504,7 @@ protected: /// The profile of every function executed at runtime is collected /// in the structure FunctionSamples. This maps function objects /// to their corresponding profiles. - StringMap<FunctionSamples> Profiles; + SampleProfileMap Profiles; /// LLVM context used to emit diagnostics. LLVMContext &Ctx; @@ -503,6 +512,10 @@ protected: /// Memory buffer holding the profile file. std::unique_ptr<MemoryBuffer> Buffer; + /// Extra name buffer holding names created on demand. + /// This should only be needed for md5 profiles. + std::unordered_set<std::string> MD5NameBuffer; + /// Profile summary information. std::unique_ptr<ProfileSummary> Summary; @@ -555,6 +568,11 @@ public: /// Return true if \p Buffer is in the format supported by this class. static bool hasFormat(const MemoryBuffer &Buffer); + +private: + /// CSNameTable is used to save full context vectors. This serves as an + /// underlying immutable buffer for all clients. + std::list<SampleContextFrameVector> CSNameTable; }; class SampleProfileReaderBinary : public SampleProfileReader { @@ -626,6 +644,7 @@ protected: /// Read a string indirectly via the name table. virtual ErrorOr<StringRef> readStringFromTable(); + virtual ErrorOr<SampleContext> readSampleContextFromTable(); private: std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries); @@ -683,6 +702,7 @@ protected: std::error_code readFuncProfiles(); std::error_code readMD5NameTable(); std::error_code readNameTableSec(bool IsMD5); + std::error_code readCSNameTableSec(); std::error_code readProfileSymbolList(); virtual std::error_code readHeader() override; @@ -692,12 +712,19 @@ protected: // placeholder for subclasses to dispatch their own section readers. virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0; virtual ErrorOr<StringRef> readStringFromTable() override; + virtual ErrorOr<SampleContext> readSampleContextFromTable() override; + ErrorOr<SampleContextFrames> readContextFromTable(); std::unique_ptr<ProfileSymbolList> ProfSymList; - /// The table mapping from function name to the offset of its FunctionSample - /// towards file start. - DenseMap<StringRef, uint64_t> FuncOffsetTable; + /// The table mapping from function context to the offset of its + /// FunctionSample towards file start. + DenseMap<SampleContext, uint64_t> FuncOffsetTable; + + /// Function offset mapping ordered by contexts. + std::unique_ptr<std::vector<std::pair<SampleContext, uint64_t>>> + OrderedFuncOffsets; + /// The set containing the functions to use when compiling a module. DenseSet<StringRef> FuncsToUse; @@ -716,10 +743,16 @@ protected: /// the lifetime of MD5StringBuf is not shorter than that of NameTable. std::unique_ptr<std::vector<std::string>> MD5StringBuf; + /// CSNameTable is used to save full context vectors. This serves as an + /// underlying immutable buffer for all clients. + std::unique_ptr<const std::vector<SampleContextFrameVector>> CSNameTable; + /// If SkipFlatProf is true, skip the sections with /// SecFlagFlat flag. bool SkipFlatProf = false; + bool FuncOffsetsOrdered = false; + public: SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B, LLVMContext &C, SampleProfileFormat Format) @@ -753,6 +786,8 @@ private: virtual std::error_code verifySPMagic(uint64_t Magic) override; virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) override { + // Update the data reader pointer to the end of the section. + Data = End; return sampleprof_error::success; }; diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h index 107f7a730a3c..773beac24ebc 100644 --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -52,7 +52,7 @@ public: /// Write all the sample profiles in the given map of samples. /// /// \returns status code of the file update operation. - virtual std::error_code write(const StringMap<FunctionSamples> &ProfileMap); + virtual std::error_code write(const SampleProfileMap &ProfileMap); raw_ostream &getOutputStream() { return *OutputStream; } @@ -78,12 +78,10 @@ protected: : OutputStream(std::move(OS)) {} /// Write a file header for the profile file. - virtual std::error_code - writeHeader(const StringMap<FunctionSamples> &ProfileMap) = 0; + virtual std::error_code writeHeader(const SampleProfileMap &ProfileMap) = 0; // Write function profiles to the profile file. - virtual std::error_code - writeFuncProfiles(const StringMap<FunctionSamples> &ProfileMap); + virtual std::error_code writeFuncProfiles(const SampleProfileMap &ProfileMap); /// Output stream where to emit the profile to. std::unique_ptr<raw_ostream> OutputStream; @@ -92,7 +90,7 @@ protected: std::unique_ptr<ProfileSummary> Summary; /// Compute summary for this profile. - void computeSummary(const StringMap<FunctionSamples> &ProfileMap); + void computeSummary(const SampleProfileMap &ProfileMap); /// Profile format. SampleProfileFormat Format = SPF_None; @@ -107,8 +105,7 @@ protected: SampleProfileWriterText(std::unique_ptr<raw_ostream> &OS) : SampleProfileWriter(OS), Indent(0) {} - std::error_code - writeHeader(const StringMap<FunctionSamples> &ProfileMap) override { + std::error_code writeHeader(const SampleProfileMap &ProfileMap) override { return sampleprof_error::success; } @@ -132,19 +129,22 @@ public: virtual std::error_code writeSample(const FunctionSamples &S) override; protected: + virtual MapVector<StringRef, uint32_t> &getNameTable() { return NameTable; } virtual std::error_code writeMagicIdent(SampleProfileFormat Format); virtual std::error_code writeNameTable(); virtual std::error_code - writeHeader(const StringMap<FunctionSamples> &ProfileMap) override; + writeHeader(const SampleProfileMap &ProfileMap) override; std::error_code writeSummary(); - std::error_code writeNameIdx(StringRef FName, bool IsContextName = false); + virtual std::error_code writeContextIdx(const SampleContext &Context); + std::error_code writeNameIdx(StringRef FName); std::error_code writeBody(const FunctionSamples &S); - inline void stablizeNameTable(std::set<StringRef> &V); + inline void stablizeNameTable(MapVector<StringRef, uint32_t> &NameTable, + std::set<StringRef> &V); MapVector<StringRef, uint32_t> NameTable; - std::unordered_set<std::string> BracketedContextStr; - void addName(StringRef FName, bool IsContextName = false); + void addName(StringRef FName); + virtual void addContext(const SampleContext &Context); void addNames(const FunctionSamples &S); private: @@ -168,6 +168,7 @@ const std::array<SmallVector<SecHdrTableEntry, 8>, NumOfLayout> // DefaultLayout SmallVector<SecHdrTableEntry, 8>({{SecProfSummary, 0, 0, 0, 0}, {SecNameTable, 0, 0, 0, 0}, + {SecCSNameTable, 0, 0, 0, 0}, {SecFuncOffsetTable, 0, 0, 0, 0}, {SecLBRProfile, 0, 0, 0, 0}, {SecProfileSymbolList, 0, 0, 0, 0}, @@ -190,8 +191,7 @@ const std::array<SmallVector<SecHdrTableEntry, 8>, NumOfLayout> class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary { using SampleProfileWriterBinary::SampleProfileWriterBinary; public: - virtual std::error_code - write(const StringMap<FunctionSamples> &ProfileMap) override; + virtual std::error_code write(const SampleProfileMap &ProfileMap) override; virtual void setToCompressAllSections() override; void setToCompressSection(SecType Type); @@ -246,29 +246,32 @@ protected: addSecFlag(SectionHdrLayout[SectionIdx], Flag); } + virtual void addContext(const SampleContext &Context) override; + // placeholder for subclasses to dispatch their own section writers. virtual std::error_code writeCustomSection(SecType Type) = 0; // Verify the SecLayout is supported by the format. virtual void verifySecLayout(SectionLayout SL) = 0; // specify the order to write sections. - virtual std::error_code - writeSections(const StringMap<FunctionSamples> &ProfileMap) = 0; + virtual std::error_code writeSections(const SampleProfileMap &ProfileMap) = 0; // Dispatch section writer for each section. \p LayoutIdx is the sequence // number indicating where the section is located in SectionHdrLayout. - virtual std::error_code - writeOneSection(SecType Type, uint32_t LayoutIdx, - const StringMap<FunctionSamples> &ProfileMap); + virtual std::error_code writeOneSection(SecType Type, uint32_t LayoutIdx, + const SampleProfileMap &ProfileMap); // Helper function to write name table. virtual std::error_code writeNameTable() override; + virtual std::error_code + writeContextIdx(const SampleContext &Context) override; + std::error_code writeCSNameIdx(const SampleContext &Context); + std::error_code writeCSNameTableSection(); - std::error_code writeFuncMetadata(const StringMap<FunctionSamples> &Profiles); + std::error_code writeFuncMetadata(const SampleProfileMap &Profiles); // Functions to write various kinds of sections. - std::error_code - writeNameTableSection(const StringMap<FunctionSamples> &ProfileMap); + std::error_code writeNameTableSection(const SampleProfileMap &ProfileMap); std::error_code writeFuncOffsetTable(); std::error_code writeProfileSymbolListSection(); @@ -289,7 +292,7 @@ private: void allocSecHdrTable(); std::error_code writeSecHdrTable(); virtual std::error_code - writeHeader(const StringMap<FunctionSamples> &ProfileMap) override; + writeHeader(const SampleProfileMap &ProfileMap) override; std::error_code compressAndOutput(); // We will swap the raw_ostream held by LocalBufStream and that @@ -312,12 +315,16 @@ private: // be read. std::vector<SecHdrTableEntry> SecHdrTable; - // FuncOffsetTable maps function name to its profile offset in SecLBRProfile - // section. It is used to load function profile on demand. - MapVector<StringRef, uint64_t> FuncOffsetTable; + // FuncOffsetTable maps function context to its profile offset in + // SecLBRProfile section. It is used to load function profile on demand. + MapVector<SampleContext, uint64_t> FuncOffsetTable; // Whether to use MD5 to represent string. bool UseMD5 = false; + /// CSNameTable maps function context to its offset in SecCSNameTable section. + /// The offset will be used everywhere where the context is referenced. + MapVector<SampleContext, uint32_t> CSNameTable; + ProfileSymbolList *ProfSymList = nullptr; }; @@ -327,13 +334,11 @@ public: : SampleProfileWriterExtBinaryBase(OS) {} private: - std::error_code - writeDefaultLayout(const StringMap<FunctionSamples> &ProfileMap); - std::error_code - writeCtxSplitLayout(const StringMap<FunctionSamples> &ProfileMap); + std::error_code writeDefaultLayout(const SampleProfileMap &ProfileMap); + std::error_code writeCtxSplitLayout(const SampleProfileMap &ProfileMap); virtual std::error_code - writeSections(const StringMap<FunctionSamples> &ProfileMap) override; + writeSections(const SampleProfileMap &ProfileMap) override; virtual std::error_code writeCustomSection(SecType Type) override { return sampleprof_error::success; @@ -380,8 +385,7 @@ class SampleProfileWriterCompactBinary : public SampleProfileWriterBinary { public: virtual std::error_code writeSample(const FunctionSamples &S) override; - virtual std::error_code - write(const StringMap<FunctionSamples> &ProfileMap) override; + virtual std::error_code write(const SampleProfileMap &ProfileMap) override; protected: /// The table mapping from function name to the offset of its FunctionSample @@ -392,7 +396,7 @@ protected: uint64_t TableOffset; virtual std::error_code writeNameTable() override; virtual std::error_code - writeHeader(const StringMap<FunctionSamples> &ProfileMap) override; + writeHeader(const SampleProfileMap &ProfileMap) override; std::error_code writeFuncOffsetTable(); }; diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index ae2fc673c54e..b3cfb71601f1 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -58,6 +58,24 @@ AARCH64_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a", AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 | AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM)) +AARCH64_ARCH("armv9-a", ARMV9A, "9-A", "v9a", + ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8, + (AArch64::AEK_CRC | AArch64::AEK_FP | + AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | + AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | + AArch64::AEK_SVE2)) +AARCH64_ARCH("armv9.1-a", ARMV9_1A, "9.1-A", "v9.1a", + ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8, + (AArch64::AEK_CRC | AArch64::AEK_FP | + AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | + AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | + AArch64::AEK_SVE2)) +AARCH64_ARCH("armv9.2-a", ARMV9_2A, "9.2-A", "v9.2a", + ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8, + (AArch64::AEK_CRC | AArch64::AEK_FP | + AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE | + AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | + AArch64::AEK_SVE2)) // For v8-R, we do not enable crypto and align with GCC that enables a more // minimal set of optional architecture extensions. AARCH64_ARCH("armv8-r", ARMV8R, "8-R", "v8r", @@ -126,6 +144,11 @@ AARCH64_CPU_NAME("cortex-a53", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true, (AArch64::AEK_CRC)) AARCH64_CPU_NAME("cortex-a55", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC)) +AARCH64_CPU_NAME("cortex-a510", ARMV9A, FK_NEON_FP_ARMV8, false, + (AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SB | + AArch64::AEK_PAUTH | AArch64::AEK_MTE | AArch64::AEK_SSBS | + AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM | + AArch64::AEK_FP16FML)) AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_CRC)) AARCH64_CPU_NAME("cortex-a65", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, @@ -155,11 +178,20 @@ AARCH64_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, AARCH64_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false, + (AArch64::AEK_MTE | AArch64::AEK_PAUTH | AArch64::AEK_FLAGM | + AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_FP16FML | + AArch64::AEK_SVE2BITPERM | AArch64::AEK_BF16)) AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_LSE)) AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false, + (AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM | + AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB | + AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM | + AArch64::AEK_FP16FML)) AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) @@ -172,6 +204,10 @@ AARCH64_CPU_NAME("neoverse-n2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false, AArch64::AEK_I8MM | AArch64::AEK_MTE | AArch64::AEK_RAS | AArch64::AEK_RCPC | AArch64::AEK_SB | AArch64::AEK_SSBS | AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM)) +AARCH64_CPU_NAME("neoverse-512tvb", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false, + (AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS | + AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 | + AArch64::AEK_DOTPROD )) AARCH64_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS | AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 | diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def index 14b169a6e111..fd08f3e6960c 100644 --- a/llvm/include/llvm/Support/ARMTargetParser.def +++ b/llvm/include/llvm/Support/ARMTargetParser.def @@ -122,6 +122,21 @@ ARM_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a", (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM)) +ARM_ARCH("armv9-a", ARMV9A, "9-A", "v9a", + ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8, + (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | + ARM::AEK_DOTPROD)) +ARM_ARCH("armv9.1-a", ARMV9_1A, "9.1-A", "v9.1a", + ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8, + (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | + ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM)) +ARM_ARCH("armv9.2-a", ARMV9_2A, "9.2-A", "v9.2a", + ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8, + (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | + ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS | + ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM)) ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R, FK_NEON_FP_ARMV8, (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB | @@ -296,6 +311,9 @@ ARM_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_FP16 | ARM::AEK_DOTPROD) +ARM_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false, + (ARM::AEK_DOTPROD | ARM::AEK_FP16FML | ARM::AEK_BF16 | ARM::AEK_SB | + ARM::AEK_I8MM)) ARM_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (ARM::AEK_FP16 | ARM::AEK_DOTPROD)) ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h index 245432debce6..9e8ce4e36197 100644 --- a/llvm/include/llvm/Support/Allocator.h +++ b/llvm/include/llvm/Support/Allocator.h @@ -277,7 +277,7 @@ public: size_t TotalMemory = 0; for (auto I = Slabs.begin(), E = Slabs.end(); I != E; ++I) TotalMemory += computeSlabSize(std::distance(Slabs.begin(), I)); - for (auto &PtrAndSize : CustomSizedSlabs) + for (const auto &PtrAndSize : CustomSizedSlabs) TotalMemory += PtrAndSize.second; return TotalMemory; } diff --git a/llvm/include/llvm/Support/AtomicOrdering.h b/llvm/include/llvm/Support/AtomicOrdering.h index 27ca825cef46..1a0d108300bc 100644 --- a/llvm/include/llvm/Support/AtomicOrdering.h +++ b/llvm/include/llvm/Support/AtomicOrdering.h @@ -133,6 +133,16 @@ inline bool isReleaseOrStronger(AtomicOrdering AO) { return isAtLeastOrStrongerThan(AO, AtomicOrdering::Release); } +/// Return a single atomic ordering that is at least as strong as both the \p AO +/// and \p Other orderings for an atomic operation. +inline AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO, + AtomicOrdering Other) { + if ((AO == AtomicOrdering::Acquire && Other == AtomicOrdering::Release) || + (AO == AtomicOrdering::Release && Other == AtomicOrdering::Acquire)) + return AtomicOrdering::AcquireRelease; + return isStrongerThan(AO, Other) ? AO : Other; +} + inline AtomicOrderingCABI toCABI(AtomicOrdering AO) { static const AtomicOrderingCABI lookup[8] = { /* NotAtomic */ AtomicOrderingCABI::relaxed, diff --git a/llvm/include/llvm/Support/BinaryByteStream.h b/llvm/include/llvm/Support/BinaryByteStream.h index ca5bb5abecfc..7d8b6d2dc43d 100644 --- a/llvm/include/llvm/Support/BinaryByteStream.h +++ b/llvm/include/llvm/Support/BinaryByteStream.h @@ -38,7 +38,7 @@ public: llvm::support::endianness getEndian() const override { return Endian; } - Error readBytes(uint32_t Offset, uint32_t Size, + Error readBytes(uint64_t Offset, uint64_t Size, ArrayRef<uint8_t> &Buffer) override { if (auto EC = checkOffsetForRead(Offset, Size)) return EC; @@ -46,7 +46,7 @@ public: return Error::success(); } - Error readLongestContiguousChunk(uint32_t Offset, + Error readLongestContiguousChunk(uint64_t Offset, ArrayRef<uint8_t> &Buffer) override { if (auto EC = checkOffsetForRead(Offset, 1)) return EC; @@ -54,7 +54,7 @@ public: return Error::success(); } - uint32_t getLength() override { return Data.size(); } + uint64_t getLength() override { return Data.size(); } ArrayRef<uint8_t> data() const { return Data; } @@ -97,19 +97,19 @@ public: return ImmutableStream.getEndian(); } - Error readBytes(uint32_t Offset, uint32_t Size, + Error readBytes(uint64_t Offset, uint64_t Size, ArrayRef<uint8_t> &Buffer) override { return ImmutableStream.readBytes(Offset, Size, Buffer); } - Error readLongestContiguousChunk(uint32_t Offset, + Error readLongestContiguousChunk(uint64_t Offset, ArrayRef<uint8_t> &Buffer) override { return ImmutableStream.readLongestContiguousChunk(Offset, Buffer); } - uint32_t getLength() override { return ImmutableStream.getLength(); } + uint64_t getLength() override { return ImmutableStream.getLength(); } - Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override { + Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Buffer) override { if (Buffer.empty()) return Error::success(); @@ -145,7 +145,7 @@ public: llvm::support::endianness getEndian() const override { return Endian; } - Error readBytes(uint32_t Offset, uint32_t Size, + Error readBytes(uint64_t Offset, uint64_t Size, ArrayRef<uint8_t> &Buffer) override { if (auto EC = checkOffsetForWrite(Offset, Buffer.size())) return EC; @@ -154,11 +154,11 @@ public: return Error::success(); } - void insert(uint32_t Offset, ArrayRef<uint8_t> Bytes) { + void insert(uint64_t Offset, ArrayRef<uint8_t> Bytes) { Data.insert(Data.begin() + Offset, Bytes.begin(), Bytes.end()); } - Error readLongestContiguousChunk(uint32_t Offset, + Error readLongestContiguousChunk(uint64_t Offset, ArrayRef<uint8_t> &Buffer) override { if (auto EC = checkOffsetForWrite(Offset, 1)) return EC; @@ -167,9 +167,9 @@ public: return Error::success(); } - uint32_t getLength() override { return Data.size(); } + uint64_t getLength() override { return Data.size(); } - Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override { + Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Buffer) override { if (Buffer.empty()) return Error::success(); @@ -182,7 +182,7 @@ public: if (Offset > getLength()) return make_error<BinaryStreamError>(stream_error_code::invalid_offset); - uint32_t RequiredSize = Offset + Buffer.size(); + uint64_t RequiredSize = Offset + Buffer.size(); if (RequiredSize > Data.size()) Data.resize(RequiredSize); @@ -240,19 +240,19 @@ public: return Impl.getEndian(); } - Error readBytes(uint32_t Offset, uint32_t Size, + Error readBytes(uint64_t Offset, uint64_t Size, ArrayRef<uint8_t> &Buffer) override { return Impl.readBytes(Offset, Size, Buffer); } - Error readLongestContiguousChunk(uint32_t Offset, + Error readLongestContiguousChunk(uint64_t Offset, ArrayRef<uint8_t> &Buffer) override { return Impl.readLongestContiguousChunk(Offset, Buffer); } - uint32_t getLength() override { return Impl.getLength(); } + uint64_t getLength() override { return Impl.getLength(); } - Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) override { + Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) override { return Impl.writeBytes(Offset, Data); } diff --git a/llvm/include/llvm/Support/BinaryItemStream.h b/llvm/include/llvm/Support/BinaryItemStream.h index 4d27013ce368..eb512bf4721a 100644 --- a/llvm/include/llvm/Support/BinaryItemStream.h +++ b/llvm/include/llvm/Support/BinaryItemStream.h @@ -38,7 +38,7 @@ public: llvm::support::endianness getEndian() const override { return Endian; } - Error readBytes(uint32_t Offset, uint32_t Size, + Error readBytes(uint64_t Offset, uint64_t Size, ArrayRef<uint8_t> &Buffer) override { auto ExpectedIndex = translateOffsetIndex(Offset); if (!ExpectedIndex) @@ -52,7 +52,7 @@ public: return Error::success(); } - Error readLongestContiguousChunk(uint32_t Offset, + Error readLongestContiguousChunk(uint64_t Offset, ArrayRef<uint8_t> &Buffer) override { auto ExpectedIndex = translateOffsetIndex(Offset); if (!ExpectedIndex) @@ -66,7 +66,7 @@ public: computeItemOffsets(); } - uint32_t getLength() override { + uint64_t getLength() override { return ItemEndOffsets.empty() ? 0 : ItemEndOffsets.back(); } @@ -74,16 +74,16 @@ private: void computeItemOffsets() { ItemEndOffsets.clear(); ItemEndOffsets.reserve(Items.size()); - uint32_t CurrentOffset = 0; + uint64_t CurrentOffset = 0; for (const auto &Item : Items) { - uint32_t Len = Traits::length(Item); + uint64_t Len = Traits::length(Item); assert(Len > 0 && "no empty items"); CurrentOffset += Len; ItemEndOffsets.push_back(CurrentOffset); } } - Expected<uint32_t> translateOffsetIndex(uint32_t Offset) { + Expected<uint32_t> translateOffsetIndex(uint64_t Offset) { // Make sure the offset is somewhere in our items array. if (Offset >= getLength()) return make_error<BinaryStreamError>(stream_error_code::stream_too_short); @@ -98,7 +98,7 @@ private: ArrayRef<T> Items; // Sorted vector of offsets to accelerate lookup. - std::vector<uint32_t> ItemEndOffsets; + std::vector<uint64_t> ItemEndOffsets; }; } // end namespace llvm diff --git a/llvm/include/llvm/Support/BinaryStream.h b/llvm/include/llvm/Support/BinaryStream.h index fcf4398550ee..e87129d8c201 100644 --- a/llvm/include/llvm/Support/BinaryStream.h +++ b/llvm/include/llvm/Support/BinaryStream.h @@ -41,22 +41,22 @@ public: /// Given an offset into the stream and a number of bytes, attempt to /// read the bytes and set the output ArrayRef to point to data owned by the /// stream. - virtual Error readBytes(uint32_t Offset, uint32_t Size, + virtual Error readBytes(uint64_t Offset, uint64_t Size, ArrayRef<uint8_t> &Buffer) = 0; /// Given an offset into the stream, read as much as possible without /// copying any data. - virtual Error readLongestContiguousChunk(uint32_t Offset, + virtual Error readLongestContiguousChunk(uint64_t Offset, ArrayRef<uint8_t> &Buffer) = 0; /// Return the number of bytes of data in this stream. - virtual uint32_t getLength() = 0; + virtual uint64_t getLength() = 0; /// Return the properties of this stream. virtual BinaryStreamFlags getFlags() const { return BSF_None; } protected: - Error checkOffsetForRead(uint32_t Offset, uint32_t DataSize) { + Error checkOffsetForRead(uint64_t Offset, uint64_t DataSize) { if (Offset > getLength()) return make_error<BinaryStreamError>(stream_error_code::invalid_offset); if (getLength() < DataSize + Offset) @@ -77,7 +77,7 @@ public: /// Attempt to write the given bytes into the stream at the desired /// offset. This will always necessitate a copy. Cannot shrink or grow the /// stream, only writes into existing allocated space. - virtual Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) = 0; + virtual Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) = 0; /// For buffered streams, commits changes to the backing store. virtual Error commit() = 0; @@ -86,7 +86,7 @@ public: BinaryStreamFlags getFlags() const override { return BSF_Write; } protected: - Error checkOffsetForWrite(uint32_t Offset, uint32_t DataSize) { + Error checkOffsetForWrite(uint64_t Offset, uint64_t DataSize) { if (!(getFlags() & BSF_Append)) return checkOffsetForRead(Offset, DataSize); diff --git a/llvm/include/llvm/Support/BinaryStreamArray.h b/llvm/include/llvm/Support/BinaryStreamArray.h index 148ab85169f2..85d29be26ca9 100644 --- a/llvm/include/llvm/Support/BinaryStreamArray.h +++ b/llvm/include/llvm/Support/BinaryStreamArray.h @@ -153,7 +153,7 @@ private: template <typename ValueType, typename Extractor> class VarStreamArrayIterator : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>, - std::forward_iterator_tag, ValueType> { + std::forward_iterator_tag, const ValueType> { typedef VarStreamArrayIterator<ValueType, Extractor> IterType; typedef VarStreamArray<ValueType, Extractor> ArrayType; @@ -197,11 +197,6 @@ public: return ThisValue; } - ValueType &operator*() { - assert(Array && !HasError); - return ThisValue; - } - IterType &operator+=(unsigned N) { for (unsigned I = 0; I < N; ++I) { // We are done with the current record, discard it so that we are diff --git a/llvm/include/llvm/Support/BinaryStreamReader.h b/llvm/include/llvm/Support/BinaryStreamReader.h index 9ad98a89aaf9..29b4b09b848c 100644 --- a/llvm/include/llvm/Support/BinaryStreamReader.h +++ b/llvm/include/llvm/Support/BinaryStreamReader.h @@ -251,16 +251,16 @@ public: } bool empty() const { return bytesRemaining() == 0; } - void setOffset(uint32_t Off) { Offset = Off; } - uint32_t getOffset() const { return Offset; } - uint32_t getLength() const { return Stream.getLength(); } - uint32_t bytesRemaining() const { return getLength() - getOffset(); } + void setOffset(uint64_t Off) { Offset = Off; } + uint64_t getOffset() const { return Offset; } + uint64_t getLength() const { return Stream.getLength(); } + uint64_t bytesRemaining() const { return getLength() - getOffset(); } /// Advance the stream's offset by \p Amount bytes. /// /// \returns a success error code if at least \p Amount bytes remain in the /// stream, otherwise returns an appropriate error code. - Error skip(uint32_t Amount); + Error skip(uint64_t Amount); /// Examine the next byte of the underlying stream without advancing the /// stream's offset. If the stream is empty the behavior is undefined. @@ -271,11 +271,11 @@ public: Error padToAlignment(uint32_t Align); std::pair<BinaryStreamReader, BinaryStreamReader> - split(uint32_t Offset) const; + split(uint64_t Offset) const; private: BinaryStreamRef Stream; - uint32_t Offset = 0; + uint64_t Offset = 0; }; } // namespace llvm diff --git a/llvm/include/llvm/Support/BinaryStreamRef.h b/llvm/include/llvm/Support/BinaryStreamRef.h index ba4c3873586d..e0aaab82ffab 100644 --- a/llvm/include/llvm/Support/BinaryStreamRef.h +++ b/llvm/include/llvm/Support/BinaryStreamRef.h @@ -30,12 +30,12 @@ protected: Length = BorrowedImpl.getLength(); } - BinaryStreamRefBase(std::shared_ptr<StreamType> SharedImpl, uint32_t Offset, - Optional<uint32_t> Length) + BinaryStreamRefBase(std::shared_ptr<StreamType> SharedImpl, uint64_t Offset, + Optional<uint64_t> Length) : SharedImpl(SharedImpl), BorrowedImpl(SharedImpl.get()), ViewOffset(Offset), Length(Length) {} - BinaryStreamRefBase(StreamType &BorrowedImpl, uint32_t Offset, - Optional<uint32_t> Length) + BinaryStreamRefBase(StreamType &BorrowedImpl, uint64_t Offset, + Optional<uint64_t> Length) : BorrowedImpl(&BorrowedImpl), ViewOffset(Offset), Length(Length) {} BinaryStreamRefBase(const BinaryStreamRefBase &Other) = default; BinaryStreamRefBase &operator=(const BinaryStreamRefBase &Other) = default; @@ -48,7 +48,7 @@ public: return BorrowedImpl->getEndian(); } - uint32_t getLength() const { + uint64_t getLength() const { if (Length.hasValue()) return *Length; @@ -58,7 +58,7 @@ public: /// Return a new BinaryStreamRef with the first \p N elements removed. If /// this BinaryStreamRef is length-tracking, then the resulting one will be /// too. - RefType drop_front(uint32_t N) const { + RefType drop_front(uint64_t N) const { if (!BorrowedImpl) return RefType(); @@ -76,7 +76,7 @@ public: /// Return a new BinaryStreamRef with the last \p N elements removed. If /// this BinaryStreamRef is length-tracking and \p N is greater than 0, then /// this BinaryStreamRef will no longer length-track. - RefType drop_back(uint32_t N) const { + RefType drop_back(uint64_t N) const { if (!BorrowedImpl) return RefType(); @@ -96,26 +96,26 @@ public: } /// Return a new BinaryStreamRef with only the first \p N elements remaining. - RefType keep_front(uint32_t N) const { + RefType keep_front(uint64_t N) const { assert(N <= getLength()); return drop_back(getLength() - N); } /// Return a new BinaryStreamRef with only the last \p N elements remaining. - RefType keep_back(uint32_t N) const { + RefType keep_back(uint64_t N) const { assert(N <= getLength()); return drop_front(getLength() - N); } /// Return a new BinaryStreamRef with the first and last \p N elements /// removed. - RefType drop_symmetric(uint32_t N) const { + RefType drop_symmetric(uint64_t N) const { return drop_front(N).drop_back(N); } /// Return a new BinaryStreamRef with the first \p Offset elements removed, /// and retaining exactly \p Len elements. - RefType slice(uint32_t Offset, uint32_t Len) const { + RefType slice(uint64_t Offset, uint64_t Len) const { return drop_front(Offset).keep_front(Len); } @@ -132,7 +132,7 @@ public: } protected: - Error checkOffsetForRead(uint32_t Offset, uint32_t DataSize) const { + Error checkOffsetForRead(uint64_t Offset, uint64_t DataSize) const { if (Offset > getLength()) return make_error<BinaryStreamError>(stream_error_code::invalid_offset); if (getLength() < DataSize + Offset) @@ -142,8 +142,8 @@ protected: std::shared_ptr<StreamType> SharedImpl; StreamType *BorrowedImpl = nullptr; - uint32_t ViewOffset = 0; - Optional<uint32_t> Length; + uint64_t ViewOffset = 0; + Optional<uint64_t> Length; }; /// BinaryStreamRef is to BinaryStream what ArrayRef is to an Array. It @@ -157,15 +157,15 @@ class BinaryStreamRef : public BinaryStreamRefBase<BinaryStreamRef, BinaryStream> { friend BinaryStreamRefBase<BinaryStreamRef, BinaryStream>; friend class WritableBinaryStreamRef; - BinaryStreamRef(std::shared_ptr<BinaryStream> Impl, uint32_t ViewOffset, - Optional<uint32_t> Length) + BinaryStreamRef(std::shared_ptr<BinaryStream> Impl, uint64_t ViewOffset, + Optional<uint64_t> Length) : BinaryStreamRefBase(Impl, ViewOffset, Length) {} public: BinaryStreamRef() = default; BinaryStreamRef(BinaryStream &Stream); - BinaryStreamRef(BinaryStream &Stream, uint32_t Offset, - Optional<uint32_t> Length); + BinaryStreamRef(BinaryStream &Stream, uint64_t Offset, + Optional<uint64_t> Length); explicit BinaryStreamRef(ArrayRef<uint8_t> Data, llvm::support::endianness Endian); explicit BinaryStreamRef(StringRef Data, llvm::support::endianness Endian); @@ -176,8 +176,8 @@ public: BinaryStreamRef &operator=(BinaryStreamRef &&Other) = default; // Use BinaryStreamRef.slice() instead. - BinaryStreamRef(BinaryStreamRef &S, uint32_t Offset, - uint32_t Length) = delete; + BinaryStreamRef(BinaryStreamRef &S, uint64_t Offset, + uint64_t Length) = delete; /// Given an Offset into this StreamRef and a Size, return a reference to a /// buffer owned by the stream. @@ -185,7 +185,7 @@ public: /// \returns a success error code if the entire range of data is within the /// bounds of this BinaryStreamRef's view and the implementation could read /// the data, and an appropriate error code otherwise. - Error readBytes(uint32_t Offset, uint32_t Size, + Error readBytes(uint64_t Offset, uint64_t Size, ArrayRef<uint8_t> &Buffer) const; /// Given an Offset into this BinaryStreamRef, return a reference to the @@ -193,29 +193,28 @@ public: /// /// \returns a success error code if implementation could read the data, /// and an appropriate error code otherwise. - Error readLongestContiguousChunk(uint32_t Offset, + Error readLongestContiguousChunk(uint64_t Offset, ArrayRef<uint8_t> &Buffer) const; }; struct BinarySubstreamRef { - uint32_t Offset = 0; // Offset in the parent stream + uint64_t Offset = 0; // Offset in the parent stream BinaryStreamRef StreamData; // Stream Data - BinarySubstreamRef slice(uint32_t Off, uint32_t Size) const { + BinarySubstreamRef slice(uint64_t Off, uint64_t Size) const { BinaryStreamRef SubSub = StreamData.slice(Off, Size); return {Off + Offset, SubSub}; } - BinarySubstreamRef drop_front(uint32_t N) const { + BinarySubstreamRef drop_front(uint64_t N) const { return slice(N, size() - N); } - BinarySubstreamRef keep_front(uint32_t N) const { return slice(0, N); } + BinarySubstreamRef keep_front(uint64_t N) const { return slice(0, N); } - std::pair<BinarySubstreamRef, BinarySubstreamRef> - split(uint32_t Off) const { + std::pair<BinarySubstreamRef, BinarySubstreamRef> split(uint64_t Off) const { return std::make_pair(keep_front(Off), drop_front(Off)); } - uint32_t size() const { return StreamData.getLength(); } + uint64_t size() const { return StreamData.getLength(); } bool empty() const { return size() == 0; } }; @@ -224,10 +223,10 @@ class WritableBinaryStreamRef WritableBinaryStream> { friend BinaryStreamRefBase<WritableBinaryStreamRef, WritableBinaryStream>; WritableBinaryStreamRef(std::shared_ptr<WritableBinaryStream> Impl, - uint32_t ViewOffset, Optional<uint32_t> Length) + uint64_t ViewOffset, Optional<uint64_t> Length) : BinaryStreamRefBase(Impl, ViewOffset, Length) {} - Error checkOffsetForWrite(uint32_t Offset, uint32_t DataSize) const { + Error checkOffsetForWrite(uint64_t Offset, uint64_t DataSize) const { if (!(BorrowedImpl->getFlags() & BSF_Append)) return checkOffsetForRead(Offset, DataSize); @@ -239,8 +238,8 @@ class WritableBinaryStreamRef public: WritableBinaryStreamRef() = default; WritableBinaryStreamRef(WritableBinaryStream &Stream); - WritableBinaryStreamRef(WritableBinaryStream &Stream, uint32_t Offset, - Optional<uint32_t> Length); + WritableBinaryStreamRef(WritableBinaryStream &Stream, uint64_t Offset, + Optional<uint64_t> Length); explicit WritableBinaryStreamRef(MutableArrayRef<uint8_t> Data, llvm::support::endianness Endian); WritableBinaryStreamRef(const WritableBinaryStreamRef &Other) = default; @@ -251,8 +250,8 @@ public: WritableBinaryStreamRef &operator=(WritableBinaryStreamRef &&Other) = default; // Use WritableBinaryStreamRef.slice() instead. - WritableBinaryStreamRef(WritableBinaryStreamRef &S, uint32_t Offset, - uint32_t Length) = delete; + WritableBinaryStreamRef(WritableBinaryStreamRef &S, uint64_t Offset, + uint64_t Length) = delete; /// Given an Offset into this WritableBinaryStreamRef and some input data, /// writes the data to the underlying stream. @@ -260,7 +259,7 @@ public: /// \returns a success error code if the data could fit within the underlying /// stream at the specified location and the implementation could write the /// data, and an appropriate error code otherwise. - Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) const; + Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) const; /// Conver this WritableBinaryStreamRef to a read-only BinaryStreamRef. operator BinaryStreamRef() const; diff --git a/llvm/include/llvm/Support/BinaryStreamWriter.h b/llvm/include/llvm/Support/BinaryStreamWriter.h index ceba792e6b26..3054f4ac7ef0 100644 --- a/llvm/include/llvm/Support/BinaryStreamWriter.h +++ b/llvm/include/llvm/Support/BinaryStreamWriter.h @@ -124,7 +124,7 @@ public: /// /// \returns a success error code if the data was successfully written, /// otherwise returns an appropriate error code. - Error writeStreamRef(BinaryStreamRef Ref, uint32_t Size); + Error writeStreamRef(BinaryStreamRef Ref, uint64_t Size); /// Writes the object \p Obj to the underlying stream, as if by using memcpy. /// It is up to the caller to ensure that type of \p Obj can be safely copied @@ -178,17 +178,17 @@ public: } /// Splits the Writer into two Writers at a given offset. - std::pair<BinaryStreamWriter, BinaryStreamWriter> split(uint32_t Off) const; + std::pair<BinaryStreamWriter, BinaryStreamWriter> split(uint64_t Off) const; - void setOffset(uint32_t Off) { Offset = Off; } - uint32_t getOffset() const { return Offset; } - uint32_t getLength() const { return Stream.getLength(); } - uint32_t bytesRemaining() const { return getLength() - getOffset(); } + void setOffset(uint64_t Off) { Offset = Off; } + uint64_t getOffset() const { return Offset; } + uint64_t getLength() const { return Stream.getLength(); } + uint64_t bytesRemaining() const { return getLength() - getOffset(); } Error padToAlignment(uint32_t Align); protected: WritableBinaryStreamRef Stream; - uint32_t Offset = 0; + uint64_t Offset = 0; }; } // end namespace llvm diff --git a/llvm/include/llvm/Support/Caching.h b/llvm/include/llvm/Support/Caching.h new file mode 100644 index 000000000000..1e5fea17f708 --- /dev/null +++ b/llvm/include/llvm/Support/Caching.h @@ -0,0 +1,71 @@ +//===- Caching.h - LLVM Local File Cache ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the CachedFileStream and the localCache function, which +// simplifies caching files on the local filesystem in a directory whose +// contents are managed by a CachePruningPolicy. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_CACHING_H +#define LLVM_SUPPORT_CACHING_H + +#include "llvm/Support/Error.h" + +namespace llvm { + +class MemoryBuffer; + +/// This class wraps an output stream for a file. Most clients should just be +/// able to return an instance of this base class from the stream callback, but +/// if a client needs to perform some action after the stream is written to, +/// that can be done by deriving from this class and overriding the destructor. +class CachedFileStream { +public: + CachedFileStream(std::unique_ptr<raw_pwrite_stream> OS) : OS(std::move(OS)) {} + std::unique_ptr<raw_pwrite_stream> OS; + virtual ~CachedFileStream() = default; +}; + +/// This type defines the callback to add a file that is generated on the fly. +/// +/// Stream callbacks must be thread safe. +using AddStreamFn = + std::function<Expected<std::unique_ptr<CachedFileStream>>(unsigned Task)>; + +/// This is the type of a file cache. To request an item from the cache, pass a +/// unique string as the Key. For hits, the cached file will be added to the +/// link and this function will return AddStreamFn(). For misses, the cache will +/// return a stream callback which must be called at most once to produce +/// content for the stream. The file stream produced by the stream callback will +/// add the file to the link after the stream is written to. +/// +/// Clients generally look like this: +/// +/// if (AddStreamFn AddStream = Cache(Task, Key)) +/// ProduceContent(AddStream); +using FileCache = + std::function<Expected<AddStreamFn>(unsigned Task, StringRef Key)>; + +/// This type defines the callback to add a pre-existing file (e.g. in a cache). +/// +/// Buffer callbacks must be thread safe. +using AddBufferFn = + std::function<void(unsigned Task, std::unique_ptr<MemoryBuffer> MB)>; + +/// Create a local file system cache which uses the given cache name, temporary +/// file prefix, cache directory and file callback. This function also creates +/// the cache directory if it does not already exist. The cache name appears in +/// error messages for errors during caching. The temporary file prefix is used +/// in the temporary file naming scheme used when writing files atomically. +Expected<FileCache> localCache(Twine CacheNameRef, Twine TempFilePrefixRef, + Twine CacheDirectoryPathRef, + AddBufferFn AddBuffer); +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 14d7e21f78b2..2ee02010ff1d 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -316,9 +316,7 @@ public: } bool isInAllSubCommands() const { - return any_of(Subs, [](const SubCommand *SC) { - return SC == &*AllSubCommands; - }); + return llvm::is_contained(Subs, &*AllSubCommands); } //-------------------------------------------------------------------------=== @@ -926,6 +924,9 @@ public: //-------------------------------------------------- // parser<bool> // + +extern template class basic_parser<bool>; + template <> class parser<bool> : public basic_parser<bool> { public: parser(Option &O) : basic_parser(O) {} @@ -949,10 +950,11 @@ public: void anchor() override; }; -extern template class basic_parser<bool>; - //-------------------------------------------------- // parser<boolOrDefault> + +extern template class basic_parser<boolOrDefault>; + template <> class parser<boolOrDefault> : public basic_parser<boolOrDefault> { public: parser(Option &O) : basic_parser(O) {} @@ -974,11 +976,12 @@ public: void anchor() override; }; -extern template class basic_parser<boolOrDefault>; - //-------------------------------------------------- // parser<int> // + +extern template class basic_parser<int>; + template <> class parser<int> : public basic_parser<int> { public: parser(Option &O) : basic_parser(O) {} @@ -996,11 +999,12 @@ public: void anchor() override; }; -extern template class basic_parser<int>; - //-------------------------------------------------- // parser<long> // + +extern template class basic_parser<long>; + template <> class parser<long> final : public basic_parser<long> { public: parser(Option &O) : basic_parser(O) {} @@ -1018,11 +1022,12 @@ public: void anchor() override; }; -extern template class basic_parser<long>; - //-------------------------------------------------- // parser<long long> // + +extern template class basic_parser<long long>; + template <> class parser<long long> : public basic_parser<long long> { public: parser(Option &O) : basic_parser(O) {} @@ -1040,11 +1045,12 @@ public: void anchor() override; }; -extern template class basic_parser<long long>; - //-------------------------------------------------- // parser<unsigned> // + +extern template class basic_parser<unsigned>; + template <> class parser<unsigned> : public basic_parser<unsigned> { public: parser(Option &O) : basic_parser(O) {} @@ -1062,11 +1068,12 @@ public: void anchor() override; }; -extern template class basic_parser<unsigned>; - //-------------------------------------------------- // parser<unsigned long> // + +extern template class basic_parser<unsigned long>; + template <> class parser<unsigned long> final : public basic_parser<unsigned long> { public: @@ -1085,11 +1092,12 @@ public: void anchor() override; }; -extern template class basic_parser<unsigned long>; - //-------------------------------------------------- // parser<unsigned long long> // + +extern template class basic_parser<unsigned long long>; + template <> class parser<unsigned long long> : public basic_parser<unsigned long long> { public: @@ -1109,11 +1117,12 @@ public: void anchor() override; }; -extern template class basic_parser<unsigned long long>; - //-------------------------------------------------- // parser<double> // + +extern template class basic_parser<double>; + template <> class parser<double> : public basic_parser<double> { public: parser(Option &O) : basic_parser(O) {} @@ -1131,11 +1140,12 @@ public: void anchor() override; }; -extern template class basic_parser<double>; - //-------------------------------------------------- // parser<float> // + +extern template class basic_parser<float>; + template <> class parser<float> : public basic_parser<float> { public: parser(Option &O) : basic_parser(O) {} @@ -1153,11 +1163,12 @@ public: void anchor() override; }; -extern template class basic_parser<float>; - //-------------------------------------------------- // parser<std::string> // + +extern template class basic_parser<std::string>; + template <> class parser<std::string> : public basic_parser<std::string> { public: parser(Option &O) : basic_parser(O) {} @@ -1178,11 +1189,12 @@ public: void anchor() override; }; -extern template class basic_parser<std::string>; - //-------------------------------------------------- // parser<char> // + +extern template class basic_parser<char>; + template <> class parser<char> : public basic_parser<char> { public: parser(Option &O) : basic_parser(O) {} @@ -1203,8 +1215,6 @@ public: void anchor() override; }; -extern template class basic_parser<char>; - //-------------------------------------------------- // PrintOptionDiff // diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h index 57052b596edb..c5318137ed3d 100644 --- a/llvm/include/llvm/Support/Compiler.h +++ b/llvm/include/llvm/Support/Compiler.h @@ -97,7 +97,7 @@ /// Sadly, this is separate from just rvalue reference support because GCC /// and MSVC implemented this later than everything else. This appears to be /// corrected in MSVC 2019 but not MSVC 2017. -#if __has_feature(cxx_rvalue_references) || LLVM_GNUC_PREREQ(4, 8, 1) || \ +#if __has_feature(cxx_rvalue_references) || defined(__GNUC__) || \ LLVM_MSC_PREREQ(1920) #define LLVM_HAS_RVALUE_REFERENCE_THIS 1 #else @@ -123,8 +123,8 @@ /// LLVM_EXTERNAL_VISIBILITY - classes, functions, and variables marked with /// this attribute will be made public and visible outside of any shared library /// they are linked in to. -#if (__has_attribute(visibility) || LLVM_GNUC_PREREQ(4, 0, 0)) && \ - !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(_WIN32) +#if __has_attribute(visibility) && !defined(__MINGW32__) && \ + !defined(__CYGWIN__) && !defined(_WIN32) #define LLVM_LIBRARY_VISIBILITY __attribute__ ((visibility("hidden"))) #define LLVM_EXTERNAL_VISIBILITY __attribute__ ((visibility("default"))) #else @@ -138,7 +138,7 @@ #define LLVM_PREFETCH(addr, rw, locality) #endif -#if __has_attribute(used) || LLVM_GNUC_PREREQ(3, 1, 0) +#if __has_attribute(used) #define LLVM_ATTRIBUTE_USED __attribute__((__used__)) #else #define LLVM_ATTRIBUTE_USED @@ -182,15 +182,15 @@ // more portable solution: // (void)unused_var_name; // Prefer cast-to-void wherever it is sufficient. -#if __has_attribute(unused) || LLVM_GNUC_PREREQ(3, 1, 0) +#if __has_attribute(unused) #define LLVM_ATTRIBUTE_UNUSED __attribute__((__unused__)) #else #define LLVM_ATTRIBUTE_UNUSED #endif // FIXME: Provide this for PE/COFF targets. -#if (__has_attribute(weak) || LLVM_GNUC_PREREQ(4, 0, 0)) && \ - (!defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(_WIN32)) +#if __has_attribute(weak) && !defined(__MINGW32__) && !defined(__CYGWIN__) && \ + !defined(_WIN32) #define LLVM_ATTRIBUTE_WEAK __attribute__((__weak__)) #else #define LLVM_ATTRIBUTE_WEAK @@ -212,7 +212,13 @@ #define LLVM_READONLY #endif -#if __has_builtin(__builtin_expect) || LLVM_GNUC_PREREQ(4, 0, 0) +#if __has_attribute(minsize) +#define LLVM_ATTRIBUTE_MINSIZE __attribute__((minsize)) +#else +#define LLVM_ATTRIBUTE_MINSIZE +#endif + +#if __has_builtin(__builtin_expect) || defined(__GNUC__) #define LLVM_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true) #define LLVM_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false) #else @@ -222,7 +228,7 @@ /// LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, /// mark a method "not for inlining". -#if __has_attribute(noinline) || LLVM_GNUC_PREREQ(3, 4, 0) +#if __has_attribute(noinline) #define LLVM_ATTRIBUTE_NOINLINE __attribute__((noinline)) #elif defined(_MSC_VER) #define LLVM_ATTRIBUTE_NOINLINE __declspec(noinline) @@ -231,10 +237,8 @@ #endif /// LLVM_ATTRIBUTE_ALWAYS_INLINE - On compilers where we have a directive to do -/// so, mark a method "always inline" because it is performance sensitive. GCC -/// 3.4 supported this but is buggy in various cases and produces unimplemented -/// errors, just use it in GCC 4.0 and later. -#if __has_attribute(always_inline) || LLVM_GNUC_PREREQ(4, 0, 0) +/// so, mark a method "always inline" because it is performance sensitive. +#if __has_attribute(always_inline) #define LLVM_ATTRIBUTE_ALWAYS_INLINE inline __attribute__((always_inline)) #elif defined(_MSC_VER) #define LLVM_ATTRIBUTE_ALWAYS_INLINE __forceinline @@ -242,15 +246,16 @@ #define LLVM_ATTRIBUTE_ALWAYS_INLINE inline #endif -#ifdef __GNUC__ -#define LLVM_ATTRIBUTE_NORETURN __attribute__((noreturn)) -#elif defined(_MSC_VER) -#define LLVM_ATTRIBUTE_NORETURN __declspec(noreturn) +/// LLVM_ATTRIBUTE_NO_DEBUG - On compilers where we have a directive to do +/// so, mark a method "no debug" because debug info makes the debugger +/// experience worse. +#if __has_attribute(nodebug) +#define LLVM_ATTRIBUTE_NODEBUG __attribute__((nodebug)) #else -#define LLVM_ATTRIBUTE_NORETURN +#define LLVM_ATTRIBUTE_NODEBUG #endif -#if __has_attribute(returns_nonnull) || LLVM_GNUC_PREREQ(4, 9, 0) +#if __has_attribute(returns_nonnull) #define LLVM_ATTRIBUTE_RETURNS_NONNULL __attribute__((returns_nonnull)) #elif defined(_MSC_VER) #define LLVM_ATTRIBUTE_RETURNS_NONNULL _Ret_notnull_ @@ -322,15 +327,17 @@ /// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands /// to an expression which states that it is undefined behavior for the /// compiler to reach this point. Otherwise is not defined. -#if __has_builtin(__builtin_unreachable) || LLVM_GNUC_PREREQ(4, 5, 0) +#if __has_builtin(__builtin_unreachable) || defined(__GNUC__) # define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable() #elif defined(_MSC_VER) # define LLVM_BUILTIN_UNREACHABLE __assume(false) +#else +# define LLVM_BUILTIN_UNREACHABLE #endif /// LLVM_BUILTIN_TRAP - On compilers which support it, expands to an expression /// which causes the program to exit abnormally. -#if __has_builtin(__builtin_trap) || LLVM_GNUC_PREREQ(4, 3, 0) +#if __has_builtin(__builtin_trap) || defined(__GNUC__) # define LLVM_BUILTIN_TRAP __builtin_trap() #elif defined(_MSC_VER) // The __debugbreak intrinsic is supported by MSVC, does not require forward @@ -361,7 +368,7 @@ /// \macro LLVM_ASSUME_ALIGNED /// Returns a pointer with an assumed alignment. -#if __has_builtin(__builtin_assume_aligned) || LLVM_GNUC_PREREQ(4, 7, 0) +#if __has_builtin(__builtin_assume_aligned) || defined(__GNUC__) # define LLVM_ASSUME_ALIGNED(p, a) __builtin_assume_aligned(p, a) #elif defined(LLVM_BUILTIN_UNREACHABLE) # define LLVM_ASSUME_ALIGNED(p, a) \ @@ -549,4 +556,13 @@ void AnnotateIgnoreWritesEnd(const char *file, int line); #define LLVM_ENABLE_EXCEPTIONS 1 #endif +/// \macro LLVM_NO_PROFILE_INSTRUMENT_FUNCTION +/// Disable the profile instrument for a function. +#if __has_attribute(no_profile_instrument_function) +#define LLVM_NO_PROFILE_INSTRUMENT_FUNCTION \ + __attribute__((no_profile_instrument_function)) +#else +#define LLVM_NO_PROFILE_INSTRUMENT_FUNCTION +#endif + #endif diff --git a/llvm/include/llvm/Support/CrashRecoveryContext.h b/llvm/include/llvm/Support/CrashRecoveryContext.h index 498690655fd1..2604ccb38431 100644 --- a/llvm/include/llvm/Support/CrashRecoveryContext.h +++ b/llvm/include/llvm/Support/CrashRecoveryContext.h @@ -99,8 +99,7 @@ public: /// Explicitly trigger a crash recovery in the current process, and /// return failure from RunSafely(). This function does not return. - LLVM_ATTRIBUTE_NORETURN - void HandleExit(int RetCode); + [[noreturn]] void HandleExit(int RetCode); /// Throw again a signal or an exception, after it was catched once by a /// CrashRecoveryContext. diff --git a/llvm/include/llvm/Support/DOTGraphTraits.h b/llvm/include/llvm/Support/DOTGraphTraits.h index a73538fa1462..ffa9abe328c8 100644 --- a/llvm/include/llvm/Support/DOTGraphTraits.h +++ b/llvm/include/llvm/Support/DOTGraphTraits.h @@ -65,6 +65,11 @@ public: return false; } + // renderNodesUsingHTML - If the function returns true, nodes will be + // rendered using HTML-like labels which allows colors, etc in the nodes + // and the edge source labels. + static bool renderNodesUsingHTML() { return false; } + /// getNodeLabel - Given a node and a pointer to the top level graph, return /// the label to print in the node. template<typename GraphType> diff --git a/llvm/include/llvm/Support/DataExtractor.h b/llvm/include/llvm/Support/DataExtractor.h index f9335c161563..f4f5905d4bcc 100644 --- a/llvm/include/llvm/Support/DataExtractor.h +++ b/llvm/include/llvm/Support/DataExtractor.h @@ -70,6 +70,9 @@ public: /// the position of the Cursor before the first error was encountered. uint64_t tell() const { return Offset; } + /// Set the cursor to the new offset. This does not impact the error state. + void seek(uint64_t NewOffSet) { Offset = NewOffSet; } + /// Return error contained inside this Cursor, if any. Clears the internal /// Cursor state. Error takeError() { return std::move(Err); } diff --git a/llvm/include/llvm/Support/Debug.h b/llvm/include/llvm/Support/Debug.h index 64b730951bda..2ff978476c79 100644 --- a/llvm/include/llvm/Support/Debug.h +++ b/llvm/include/llvm/Support/Debug.h @@ -78,27 +78,6 @@ void setCurrentDebugTypes(const char **Types, unsigned Count); /// extern bool DebugFlag; -/// \name Verification flags. -/// -/// These flags turns on/off that are expensive and are turned off by default, -/// unless macro EXPENSIVE_CHECKS is defined. The flags allow selectively -/// turning the checks on without need to recompile. -/// \{ - -/// Enables verification of dominator trees. -/// -extern bool VerifyDomInfo; - -/// Enables verification of loop info. -/// -extern bool VerifyLoopInfo; - -/// Enables verification of MemorySSA. -/// -extern bool VerifyMemorySSA; - -///\} - /// EnableDebugBuffering - This defaults to false. If true, the debug /// stream will install signal handlers to dump any buffered debug /// output. It allows clients to selectively allow the debug stream diff --git a/llvm/include/llvm/Support/DivisionByConstantInfo.h b/llvm/include/llvm/Support/DivisionByConstantInfo.h new file mode 100644 index 000000000000..5bb326178c3e --- /dev/null +++ b/llvm/include/llvm/Support/DivisionByConstantInfo.h @@ -0,0 +1,38 @@ +//== llvm/Support/DivisonByConstantInfo.h - division by constant -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file implements support for optimizing divisions by a constant +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_DIVISON_BY_CONSTANT_INFO_H +#define LLVM_SUPPORT_DIVISON_BY_CONSTANT_INFO_H + +#include "llvm/ADT/APInt.h" + +namespace llvm { + +/// Magic data for optimising signed division by a constant. +struct SignedDivisionByConstantInfo { + static SignedDivisionByConstantInfo get(const APInt &D); + APInt Magic; ///< magic number + unsigned ShiftAmount; ///< shift amount +}; + +/// Magic data for optimising unsigned division by a constant. +struct UnsignedDivisonByConstantInfo { + static UnsignedDivisonByConstantInfo get(const APInt &D, + unsigned LeadingZeros = 0); + APInt Magic; ///< magic number + bool IsAdd; ///< add indicator + unsigned ShiftAmount; ///< shift amount +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h index e8f340e452ef..e2002b89ada2 100644 --- a/llvm/include/llvm/Support/Error.h +++ b/llvm/include/llvm/Support/Error.h @@ -257,8 +257,7 @@ private: // of debug prints can cause the function to be too large for inlining. So // it's important that we define this function out of line so that it can't be // inlined. - LLVM_ATTRIBUTE_NORETURN - void fatalUncheckedError() const; + [[noreturn]] void fatalUncheckedError() const; #endif void assertIsChecked() { @@ -314,7 +313,7 @@ private: } friend raw_ostream &operator<<(raw_ostream &OS, const Error &E) { - if (auto P = E.getPtr()) + if (auto *P = E.getPtr()) P->log(OS); else OS << "success"; @@ -374,7 +373,7 @@ class ErrorList final : public ErrorInfo<ErrorList> { public: void log(raw_ostream &OS) const override { OS << "Multiple errors:\n"; - for (auto &ErrPayload : Payloads) { + for (const auto &ErrPayload : Payloads) { ErrPayload->log(OS); OS << "\n"; } @@ -578,6 +577,16 @@ public: return const_cast<Expected<T> *>(this)->get(); } + /// Returns \a takeError() after moving the held T (if any) into \p V. + template <class OtherT> + Error moveInto(OtherT &Value, + std::enable_if_t<std::is_assignable<OtherT &, T &&>::value> * = + nullptr) && { + if (*this) + Value = std::move(get()); + return takeError(); + } + /// Check that this Expected<T> is an error of type ErrT. template <typename ErrT> bool errorIsA() const { return HasError && (*getErrorStorage())->template isA<ErrT>(); @@ -688,9 +697,7 @@ private: } #if LLVM_ENABLE_ABI_BREAKING_CHECKS - LLVM_ATTRIBUTE_NORETURN - LLVM_ATTRIBUTE_NOINLINE - void fatalUncheckedExpected() const { + [[noreturn]] LLVM_ATTRIBUTE_NOINLINE void fatalUncheckedExpected() const { dbgs() << "Expected<T> must be checked before access or destruction.\n"; if (HasError) { dbgs() << "Unchecked Expected<T> contained error:\n"; @@ -722,8 +729,7 @@ private: /// Report a serious error, calling any installed error handler. See /// ErrorHandling.h. -LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, - bool gen_crash_diag = true); +[[noreturn]] void report_fatal_error(Error Err, bool gen_crash_diag = true); /// Report a fatal error if Err is a failure value. /// @@ -1159,7 +1165,7 @@ protected: /// It should only be used in this situation, and should never be used where a /// sensible conversion to std::error_code is available, as attempts to convert /// to/from this error will result in a fatal error. (i.e. it is a programmatic -///error to try to convert such a value). +/// error to try to convert such a value). std::error_code inconvertibleErrorCode(); /// Helper for converting an std::error_code to a Error. @@ -1263,13 +1269,20 @@ class FileError final : public ErrorInfo<FileError> { public: void log(raw_ostream &OS) const override { - assert(Err && !FileName.empty() && "Trying to log after takeError()."); + assert(Err && "Trying to log after takeError()."); OS << "'" << FileName << "': "; if (Line.hasValue()) OS << "line " << Line.getValue() << ": "; Err->log(OS); } + std::string messageWithoutFileInfo() const { + std::string Msg; + raw_string_ostream OS(Msg); + Err->log(OS); + return OS.str(); + } + StringRef getFileName() { return FileName; } Error takeError() { return Error(std::move(Err)); } @@ -1283,8 +1296,6 @@ private: FileError(const Twine &F, Optional<size_t> LineNum, std::unique_ptr<ErrorInfoBase> E) { assert(E && "Cannot create FileError from Error success value."); - assert(!F.isTriviallyEmpty() && - "The file name provided to FileError must not be empty."); FileName = F.str(); Err = std::move(E); Line = std::move(LineNum); diff --git a/llvm/include/llvm/Support/ErrorHandling.h b/llvm/include/llvm/Support/ErrorHandling.h index 0ec0242d569d..f980510d37f0 100644 --- a/llvm/include/llvm/Support/ErrorHandling.h +++ b/llvm/include/llvm/Support/ErrorHandling.h @@ -15,15 +15,14 @@ #define LLVM_SUPPORT_ERRORHANDLING_H #include "llvm/Support/Compiler.h" -#include <string> namespace llvm { -class StringRef; + class StringRef; class Twine; /// An error handler callback. typedef void (*fatal_error_handler_t)(void *user_data, - const std::string& reason, + const char *reason, bool gen_crash_diag); /// install_fatal_error_handler - Installs a new error handler to be used @@ -68,14 +67,13 @@ class StringRef; /// standard error, followed by a newline. /// After the error handler is called this function will call abort(), it /// does not return. -LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, - bool gen_crash_diag = true); -LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason, - bool gen_crash_diag = true); -LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason, - bool gen_crash_diag = true); -LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason, - bool gen_crash_diag = true); +/// NOTE: The std::string variant was removed to avoid a <string> dependency. +[[noreturn]] void report_fatal_error(const char *reason, + bool gen_crash_diag = true); +[[noreturn]] void report_fatal_error(StringRef reason, + bool gen_crash_diag = true); +[[noreturn]] void report_fatal_error(const Twine &reason, + bool gen_crash_diag = true); /// Installs a new bad alloc error handler that should be used whenever a /// bad alloc error, e.g. failing malloc/calloc, is encountered by LLVM. @@ -113,13 +111,13 @@ void install_out_of_memory_new_handler(); /// If no error handler is installed (default), throws a bad_alloc exception /// if LLVM is compiled with exception support. Otherwise prints the error /// to standard error and calls abort(). -LLVM_ATTRIBUTE_NORETURN void report_bad_alloc_error(const char *Reason, - bool GenCrashDiag = true); +[[noreturn]] void report_bad_alloc_error(const char *Reason, + bool GenCrashDiag = true); /// This function calls abort(), and prints the optional message to stderr. /// Use the llvm_unreachable macro (that adds location info), instead of /// calling this function directly. -LLVM_ATTRIBUTE_NORETURN void +[[noreturn]] void llvm_unreachable_internal(const char *msg = nullptr, const char *file = nullptr, unsigned line = 0); } diff --git a/llvm/include/llvm/Support/ExtensibleRTTI.h b/llvm/include/llvm/Support/ExtensibleRTTI.h index 6b8510ce759f..21055247e932 100644 --- a/llvm/include/llvm/Support/ExtensibleRTTI.h +++ b/llvm/include/llvm/Support/ExtensibleRTTI.h @@ -1,9 +1,8 @@ //===-- llvm/Support/ExtensibleRTTI.h - ExtensibleRTTI support --*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Support/FileSystem.h b/llvm/include/llvm/Support/FileSystem.h index 38779ef4a3af..1a049533b82b 100644 --- a/llvm/include/llvm/Support/FileSystem.h +++ b/llvm/include/llvm/Support/FileSystem.h @@ -772,7 +772,8 @@ enum OpenFlags : unsigned { /// The file should be opened in append mode. OF_Append = 4, - /// Delete the file on close. Only makes a difference on windows. + /// The returned handle can be used for deleting the file. Only makes a + /// difference on windows. OF_Delete = 8, /// When a child process is launched, this file should remain open in the @@ -865,6 +866,11 @@ public: // The open file descriptor. int FD = -1; +#ifdef _WIN32 + // Whether we need to manually remove the file on close. + bool RemoveOnClose = false; +#endif + // Keep this with the given name. Error keep(const Twine &Name); diff --git a/llvm/include/llvm/Support/FileSystem/UniqueID.h b/llvm/include/llvm/Support/FileSystem/UniqueID.h index 229410c8292e..0d5367236e8d 100644 --- a/llvm/include/llvm/Support/FileSystem/UniqueID.h +++ b/llvm/include/llvm/Support/FileSystem/UniqueID.h @@ -14,7 +14,10 @@ #ifndef LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H #define LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" #include <cstdint> +#include <utility> namespace llvm { namespace sys { @@ -47,6 +50,30 @@ public: } // end namespace fs } // end namespace sys + +// Support UniqueIDs as DenseMap keys. +template <> struct DenseMapInfo<llvm::sys::fs::UniqueID> { + static inline llvm::sys::fs::UniqueID getEmptyKey() { + auto EmptyKey = DenseMapInfo<std::pair<uint64_t, uint64_t>>::getEmptyKey(); + return {EmptyKey.first, EmptyKey.second}; + } + + static inline llvm::sys::fs::UniqueID getTombstoneKey() { + auto TombstoneKey = + DenseMapInfo<std::pair<uint64_t, uint64_t>>::getTombstoneKey(); + return {TombstoneKey.first, TombstoneKey.second}; + } + + static hash_code getHashValue(const llvm::sys::fs::UniqueID &Tag) { + return hash_value(std::make_pair(Tag.getDevice(), Tag.getFile())); + } + + static bool isEqual(const llvm::sys::fs::UniqueID &LHS, + const llvm::sys::fs::UniqueID &RHS) { + return LHS == RHS; + } +}; + } // end namespace llvm #endif // LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H diff --git a/llvm/include/llvm/Support/FormatVariadic.h b/llvm/include/llvm/Support/FormatVariadic.h index 094b054f773f..89575f01b717 100644 --- a/llvm/include/llvm/Support/FormatVariadic.h +++ b/llvm/include/llvm/Support/FormatVariadic.h @@ -94,7 +94,7 @@ public: continue; } - auto W = Adapters[R.Index]; + auto *W = Adapters[R.Index]; FmtAlign Align(*W, R.Where, R.Align, R.Pad); Align.format(S, R.Options); diff --git a/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/llvm/include/llvm/Support/GenericDomTreeConstruction.h index d306ebe99bc1..e504a0eddeba 100644 --- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h +++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h @@ -78,7 +78,7 @@ struct SemiNCAInfo { using UpdateT = typename DomTreeT::UpdateType; using UpdateKind = typename DomTreeT::UpdateKind; struct BatchUpdateInfo { - // Note: Updates inside PreViewCFG are aleady legalized. + // Note: Updates inside PreViewCFG are already legalized. BatchUpdateInfo(GraphDiffT &PreViewCFG, GraphDiffT *PostViewCFG = nullptr) : PreViewCFG(PreViewCFG), PostViewCFG(PostViewCFG), NumLegalized(PreViewCFG.getNumLegalizedUpdates()) {} @@ -430,7 +430,6 @@ struct SemiNCAInfo { // is unreachable. This is because we are still going to only visit each // unreachable node once, we may just visit it in two directions, // depending on how lucky we get. - SmallPtrSet<NodePtr, 4> ConnectToExitBlock; for (const NodePtr I : nodes(DT.Parent)) { if (SNCA.NodeToInfo.count(I) == 0) { LLVM_DEBUG(dbgs() @@ -457,7 +456,6 @@ struct SemiNCAInfo { LLVM_DEBUG(dbgs() << "\t\t\tFound a new furthest away node " << "(non-trivial root): " << BlockNamePrinter(FurthestAway) << "\n"); - ConnectToExitBlock.insert(FurthestAway); Roots.push_back(FurthestAway); LLVM_DEBUG(dbgs() << "\t\t\tPrev DFSNum: " << Num << ", new DFSNum: " << NewNum << "\n\t\t\tRemoving DFS info\n"); diff --git a/llvm/include/llvm/Support/GraphWriter.h b/llvm/include/llvm/Support/GraphWriter.h index b886bf45f474..11a31bf40160 100644 --- a/llvm/include/llvm/Support/GraphWriter.h +++ b/llvm/include/llvm/Support/GraphWriter.h @@ -66,6 +66,7 @@ template<typename GraphType> class GraphWriter { raw_ostream &O; const GraphType &G; + bool RenderUsingHTML = false; using DOTTraits = DOTGraphTraits<GraphType>; using GTraits = GraphTraits<GraphType>; @@ -86,6 +87,9 @@ class GraphWriter { child_iterator EE = GTraits::child_end(Node); bool hasEdgeSourceLabels = false; + if (RenderUsingHTML) + O << "</tr><tr>"; + for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) { std::string label = DTraits.getEdgeSourceLabel(Node, EI); @@ -94,14 +98,22 @@ class GraphWriter { hasEdgeSourceLabels = true; - if (i) - O << "|"; + if (RenderUsingHTML) + O << "<td colspan=\"1\" port=\"s" << i << "\">" << label << "</td>"; + else { + if (i) + O << "|"; - O << "<s" << i << ">" << DOT::EscapeString(label); + O << "<s" << i << ">" << DOT::EscapeString(label); + } } - if (EI != EE && hasEdgeSourceLabels) - O << "|<s64>truncated..."; + if (EI != EE && hasEdgeSourceLabels) { + if (RenderUsingHTML) + O << "<td colspan=\"1\" port=\"s64\">truncated...</td>"; + else + O << "|<s64>truncated..."; + } return hasEdgeSourceLabels; } @@ -109,6 +121,7 @@ class GraphWriter { public: GraphWriter(raw_ostream &o, const GraphType &g, bool SN) : O(o), G(g) { DTraits = DOTTraits(SN); + RenderUsingHTML = DTraits.renderNodesUsingHTML(); } void writeGraph(const std::string &Title = "") { @@ -163,12 +176,39 @@ public: void writeNode(NodeRef Node) { std::string NodeAttributes = DTraits.getNodeAttributes(Node, G); - O << "\tNode" << static_cast<const void*>(Node) << " [shape=record,"; + O << "\tNode" << static_cast<const void *>(Node) << " [shape="; + if (RenderUsingHTML) + O << "none,"; + else + O << "record,"; + if (!NodeAttributes.empty()) O << NodeAttributes << ","; - O << "label=\"{"; + O << "label="; + + if (RenderUsingHTML) { + // Count the numbewr of edges out of the node to determine how + // many columns to span (max 64) + unsigned ColSpan = 0; + child_iterator EI = GTraits::child_begin(Node); + child_iterator EE = GTraits::child_end(Node); + for (; EI != EE && ColSpan != 64; ++EI, ++ColSpan) + ; + if (ColSpan == 0) + ColSpan = 1; + // Include truncated messages when counting. + if (EI != EE) + ++ColSpan; + O << "<<table border=\"0\" cellborder=\"1\" cellspacing=\"0\"" + << " cellpadding=\"0\"><tr><td align=\"text\" colspan=\"" << ColSpan + << "\">"; + } else + O << "\"{"; if (!DTraits.renderGraphFromBottomUp()) { - O << DOT::EscapeString(DTraits.getNodeLabel(Node, G)); + if (RenderUsingHTML) + O << DTraits.getNodeLabel(Node, G) << "</td>"; + else + O << DOT::EscapeString(DTraits.getNodeLabel(Node, G)); // If we should include the address of the node in the label, do so now. std::string Id = DTraits.getNodeIdentifierLabel(Node, G); @@ -185,15 +225,25 @@ public: bool hasEdgeSourceLabels = getEdgeSourceLabels(EdgeSourceLabels, Node); if (hasEdgeSourceLabels) { - if (!DTraits.renderGraphFromBottomUp()) O << "|"; - - O << "{" << EdgeSourceLabels.str() << "}"; - - if (DTraits.renderGraphFromBottomUp()) O << "|"; + if (!DTraits.renderGraphFromBottomUp()) + if (!RenderUsingHTML) + O << "|"; + + if (RenderUsingHTML) + O << EdgeSourceLabels.str(); + else + O << "{" << EdgeSourceLabels.str() << "}"; + + if (DTraits.renderGraphFromBottomUp()) + if (!RenderUsingHTML) + O << "|"; } if (DTraits.renderGraphFromBottomUp()) { - O << DOT::EscapeString(DTraits.getNodeLabel(Node, G)); + if (RenderUsingHTML) + O << DTraits.getNodeLabel(Node, G); + else + O << DOT::EscapeString(DTraits.getNodeLabel(Node, G)); // If we should include the address of the node in the label, do so now. std::string Id = DTraits.getNodeIdentifierLabel(Node, G); @@ -215,12 +265,17 @@ public: << DOT::EscapeString(DTraits.getEdgeDestLabel(Node, i)); } - if (i != e) - O << "|<d64>truncated..."; - O << "}"; + if (RenderUsingHTML) + O << "<td colspan=\"1\">... truncated</td>"; + else if (i != e) + O << "|<d64>truncated...}"; } - O << "}\"];\n"; // Finish printing the "node" line + if (RenderUsingHTML) + O << "</tr></table>>"; + else + O << "}\""; + O << "];\n"; // Finish printing the "node" line // Output all of the edges now child_iterator EI = GTraits::child_begin(Node); diff --git a/llvm/include/llvm/Support/HashBuilder.h b/llvm/include/llvm/Support/HashBuilder.h new file mode 100644 index 000000000000..bf93a0d22da7 --- /dev/null +++ b/llvm/include/llvm/Support/HashBuilder.h @@ -0,0 +1,438 @@ +//===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements an interface allowing to conveniently build hashes of +// various data types, without relying on the underlying hasher type to know +// about hashed data types. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_HASHBUILDER_H +#define LLVM_SUPPORT_HASHBUILDER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/type_traits.h" + +#include <iterator> +#include <utility> + +namespace llvm { + +namespace hashbuilder_detail { +/// Trait to indicate whether a type's bits can be hashed directly (after +/// endianness correction). +template <typename U> +struct IsHashableData + : std::integral_constant<bool, is_integral_or_enum<U>::value> {}; + +} // namespace hashbuilder_detail + +/// Declares the hasher member, and functions forwarding directly to the hasher. +template <typename HasherT> class HashBuilderBase { +public: + HasherT &getHasher() { return Hasher; } + + /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. + /// + /// This may not take the size of `Data` into account. + /// Users of this function should pay attention to respect endianness + /// contraints. + void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); } + + /// Forward to `HasherT::update(ArrayRef<uint8_t>)`. + /// + /// This may not take the size of `Data` into account. + /// Users of this function should pay attention to respect endianness + /// contraints. + void update(StringRef Data) { + update(makeArrayRef(reinterpret_cast<const uint8_t *>(Data.data()), + Data.size())); + } + + /// Forward to `HasherT::final()` if available. + template <typename HasherT_ = HasherT> StringRef final() { + return this->getHasher().final(); + } + + /// Forward to `HasherT::result()` if available. + template <typename HasherT_ = HasherT> StringRef result() { + return this->getHasher().result(); + } + +protected: + explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {} + + template <typename... ArgTypes> + explicit HashBuilderBase(ArgTypes &&...Args) + : OptionalHasher(in_place, std::forward<ArgTypes>(Args)...), + Hasher(*OptionalHasher) {} + +private: + Optional<HasherT> OptionalHasher; + HasherT &Hasher; +}; + +/// Implementation of the `HashBuilder` interface. +/// +/// `support::endianness::native` is not supported. `HashBuilder` is +/// expected to canonicalize `support::endianness::native` to one of +/// `support::endianness::big` or `support::endianness::little`. +template <typename HasherT, support::endianness Endianness> +class HashBuilderImpl : public HashBuilderBase<HasherT> { + static_assert(Endianness != support::endianness::native, + "HashBuilder should canonicalize endianness"); + +public: + explicit HashBuilderImpl(HasherT &Hasher) + : HashBuilderBase<HasherT>(Hasher) {} + template <typename... ArgTypes> + explicit HashBuilderImpl(ArgTypes &&...Args) + : HashBuilderBase<HasherT>(Args...) {} + + /// Implement hashing for hashable data types, e.g. integral or enum values. + template <typename T> + std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value, + HashBuilderImpl &> + add(T Value) { + return adjustForEndiannessAndAdd(Value); + } + + /// Support hashing `ArrayRef`. + /// + /// `Value.size()` is taken into account to ensure cases like + /// ``` + /// builder.add({1}); + /// builder.add({2, 3}); + /// ``` + /// and + /// ``` + /// builder.add({1, 2}); + /// builder.add({3}); + /// ``` + /// do not collide. + template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) { + // As of implementation time, simply calling `addRange(Value)` would also go + // through the `update` fast path. But that would rely on the implementation + // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call + // `update` to guarantee the fast path. + add(Value.size()); + if (hashbuilder_detail::IsHashableData<T>::value && + Endianness == support::endian::system_endianness()) { + this->update( + makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), + Value.size() * sizeof(T))); + } else { + for (auto &V : Value) + add(V); + } + return *this; + } + + /// Support hashing `StringRef`. + /// + /// `Value.size()` is taken into account to ensure cases like + /// ``` + /// builder.add("a"); + /// builder.add("bc"); + /// ``` + /// and + /// ``` + /// builder.add("ab"); + /// builder.add("c"); + /// ``` + /// do not collide. + HashBuilderImpl &add(StringRef Value) { + // As of implementation time, simply calling `addRange(Value)` would also go + // through `update`. But that would rely on the implementation of + // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to + // guarantee the fast path. + add(Value.size()); + this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()), + Value.size())); + return *this; + } + + template <typename T> + using HasAddHashT = + decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>())); + /// Implement hashing for user-defined `struct`s. + /// + /// Any user-define `struct` can participate in hashing via `HashBuilder` by + /// providing a `addHash` templated function. + /// + /// ``` + /// template <typename HasherT, support::endianness Endianness> + /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder, + /// const UserDefinedStruct &Value); + /// ``` + /// + /// For example: + /// ``` + /// struct SimpleStruct { + /// char c; + /// int i; + /// }; + /// + /// template <typename HasherT, support::endianness Endianness> + /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, + /// const SimpleStruct &Value) { + /// HBuilder.add(Value.c); + /// HBuilder.add(Value.i); + /// } + /// ``` + /// + /// To avoid endianness issues, specializations of `addHash` should + /// generally rely on exising `add`, `addRange`, and `addRangeElements` + /// functions. If directly using `update`, an implementation must correctly + /// handle endianness. + /// + /// ``` + /// struct __attribute__ ((packed)) StructWithFastHash { + /// int I; + /// char C; + /// + /// // If possible, we want to hash both `I` and `C` in a single + /// // `update` call for performance concerns. + /// template <typename HasherT, support::endianness Endianness> + /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, + /// const StructWithFastHash &Value) { + /// if (Endianness == support::endian::system_endianness()) { + /// HBuilder.update(makeArrayRef( + /// reinterpret_cast<const uint8_t *>(&Value), sizeof(Value))); + /// } else { + /// // Rely on existing `add` methods to handle endianness. + /// HBuilder.add(Value.I); + /// HBuilder.add(Value.C); + /// } + /// } + /// }; + /// ``` + /// + /// To avoid collisions, specialization of `addHash` for variable-size + /// types must take the size into account. + /// + /// For example: + /// ``` + /// struct CustomContainer { + /// private: + /// size_t Size; + /// int Elements[100]; + /// + /// public: + /// CustomContainer(size_t Size) : Size(Size) { + /// for (size_t I = 0; I != Size; ++I) + /// Elements[I] = I; + /// } + /// template <typename HasherT, support::endianness Endianness> + /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, + /// const CustomContainer &Value) { + /// if (Endianness == support::endian::system_endianness()) { + /// HBuilder.update(makeArrayRef( + /// reinterpret_cast<const uint8_t *>(&Value.Size), + /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0]))); + /// } else { + /// // `addRange` will take care of encoding the size. + /// HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] + + /// Value.Size); + /// } + /// } + /// }; + /// ``` + template <typename T> + std::enable_if_t<is_detected<HasAddHashT, T>::value && + !hashbuilder_detail::IsHashableData<T>::value, + HashBuilderImpl &> + add(const T &Value) { + addHash(*this, Value); + return *this; + } + + template <typename T1, typename T2> + HashBuilderImpl &add(const std::pair<T1, T2> &Value) { + add(Value.first); + add(Value.second); + return *this; + } + + template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) { + return addTupleHelper(Arg, typename std::index_sequence_for<Ts...>()); + } + + /// A convenenience variadic helper. + /// It simply iterates over its arguments, in order. + /// ``` + /// add(Arg1, Arg2); + /// ``` + /// is equivalent to + /// ``` + /// add(Arg1) + /// add(Arg2) + /// ``` + template <typename T, typename... Ts> + typename std::enable_if<(sizeof...(Ts) >= 1), HashBuilderImpl &>::type + add(const T &FirstArg, const Ts &...Args) { + add(FirstArg); + add(Args...); + return *this; + } + + template <typename ForwardIteratorT> + HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) { + add(std::distance(First, Last)); + return addRangeElements(First, Last); + } + + template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) { + return addRange(adl_begin(Range), adl_end(Range)); + } + + template <typename ForwardIteratorT> + HashBuilderImpl &addRangeElements(ForwardIteratorT First, + ForwardIteratorT Last) { + return addRangeElementsImpl( + First, Last, + typename std::iterator_traits<ForwardIteratorT>::iterator_category()); + } + + template <typename RangeT> + HashBuilderImpl &addRangeElements(const RangeT &Range) { + return addRangeElements(adl_begin(Range), adl_end(Range)); + } + + template <typename T> + using HasByteSwapT = decltype(support::endian::byte_swap( + std::declval<T &>(), support::endianness::little)); + /// Adjust `Value` for the target endianness and add it to the hash. + template <typename T> + std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &> + adjustForEndiannessAndAdd(const T &Value) { + T SwappedValue = support::endian::byte_swap(Value, Endianness); + this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue), + sizeof(SwappedValue))); + return *this; + } + +private: + template <typename... Ts, std::size_t... Indices> + HashBuilderImpl &addTupleHelper(const std::tuple<Ts...> &Arg, + std::index_sequence<Indices...>) { + add(std::get<Indices>(Arg)...); + return *this; + } + + // FIXME: Once available, specialize this function for `contiguous_iterator`s, + // and use it for `ArrayRef` and `StringRef`. + template <typename ForwardIteratorT> + HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First, + ForwardIteratorT Last, + std::forward_iterator_tag) { + for (auto It = First; It != Last; ++It) + add(*It); + return *this; + } + + template <typename T> + std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value && + Endianness == support::endian::system_endianness(), + HashBuilderImpl &> + addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) { + this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(First), + (Last - First) * sizeof(T))); + return *this; + } +}; + +/// Interface to help hash various types through a hasher type. +/// +/// Via provided specializations of `add`, `addRange`, and `addRangeElements` +/// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed +/// without requiring any knowledge of hashed types from the hasher type. +/// +/// The only method expected from the templated hasher type `HasherT` is: +/// * void update(ArrayRef<uint8_t> Data) +/// +/// Additionally, the following methods will be forwarded to the hasher type: +/// * decltype(std::declval<HasherT &>().final()) final() +/// * decltype(std::declval<HasherT &>().result()) result() +/// +/// From a user point of view, the interface provides the following: +/// * `template<typename T> add(const T &Value)` +/// The `add` function implements hashing of various types. +/// * `template <typename ItT> void addRange(ItT First, ItT Last)` +/// The `addRange` function is designed to aid hashing a range of values. +/// It explicitly adds the size of the range in the hash. +/// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)` +/// The `addRangeElements` function is also designed to aid hashing a range of +/// values. In contrast to `addRange`, it **ignores** the size of the range, +/// behaving as if elements were added one at a time with `add`. +/// +/// User-defined `struct` types can participate in this interface by providing +/// an `addHash` templated function. See the associated template specialization +/// for details. +/// +/// This interface does not impose requirements on the hasher +/// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for +/// variable-size types; for example for +/// ``` +/// builder.add({1}); +/// builder.add({2, 3}); +/// ``` +/// and +/// ``` +/// builder.add({1, 2}); +/// builder.add({3}); +/// ``` +/// . Thus, specializations of `add` and `addHash` for variable-size types must +/// not assume that the hasher type considers the size as part of the hash; they +/// must explicitly add the size to the hash. See for example specializations +/// for `ArrayRef` and `StringRef`. +/// +/// Additionally, since types are eventually forwarded to the hasher's +/// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash +/// computation (for example when computing `add((int)123)`). +/// Specifiying a non-`native` `Endianness` template parameter allows to compute +/// stable hash across platforms with different endianness. +template <class HasherT, support::endianness Endianness> +using HashBuilder = + HashBuilderImpl<HasherT, (Endianness == support::endianness::native + ? support::endian::system_endianness() + : Endianness)>; + +namespace hashbuilder_detail { +class HashCodeHasher { +public: + HashCodeHasher() : Code(0) {} + void update(ArrayRef<uint8_t> Data) { + hash_code DataCode = hash_value(Data); + Code = hash_combine(Code, DataCode); + } + hash_code Code; +}; + +using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher, + support::endianness::native>; +} // namespace hashbuilder_detail + +/// Provide a default implementation of `hash_value` when `addHash(const T &)` +/// is supported. +template <typename T> +std::enable_if_t< + is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value, + hash_code> +hash_value(const T &Value) { + hashbuilder_detail::HashCodeHashBuilder HBuilder; + HBuilder.add(Value); + return HBuilder.getHasher().Code; +} +} // end namespace llvm + +#endif // LLVM_SUPPORT_HASHBUILDER_H diff --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h index c753cee60ec1..469f50be40e0 100644 --- a/llvm/include/llvm/Support/JSON.h +++ b/llvm/include/llvm/Support/JSON.h @@ -234,7 +234,7 @@ inline bool operator!=(const Array &L, const Array &R) { return !(L == R); } /// Each Value is one of the JSON kinds: /// null (nullptr_t) /// boolean (bool) -/// number (double or int64) +/// number (double, int64 or uint64) /// string (StringRef) /// array (json::Array) /// object (json::Object) @@ -342,9 +342,20 @@ public: Value(T B) : Type(T_Boolean) { create<bool>(B); } - // Integers (except boolean). Must be non-narrowing convertible to int64_t. + + // Unsigned 64-bit long integers. + template <typename T, + typename = std::enable_if_t<std::is_same<T, uint64_t>::value>, + bool = false, bool = false> + Value(T V) : Type(T_UINT64) { + create<uint64_t>(uint64_t{V}); + } + + // Integers (except boolean and uint64_t). + // Must be non-narrowing convertible to int64_t. template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>, - typename = std::enable_if_t<!std::is_same<T, bool>::value>> + typename = std::enable_if_t<!std::is_same<T, bool>::value>, + typename = std::enable_if_t<!std::is_same<T, uint64_t>::value>> Value(T I) : Type(T_Integer) { create<int64_t>(int64_t{I}); } @@ -382,6 +393,7 @@ public: return Boolean; case T_Double: case T_Integer: + case T_UINT64: return Number; case T_String: case T_StringRef: @@ -410,6 +422,8 @@ public: return as<double>(); if (LLVM_LIKELY(Type == T_Integer)) return as<int64_t>(); + if (LLVM_LIKELY(Type == T_UINT64)) + return as<uint64_t>(); return llvm::None; } // Succeeds if the Value is a Number, and exactly representable as int64_t. @@ -425,6 +439,16 @@ public: } return llvm::None; } + llvm::Optional<uint64_t> getAsUINT64() const { + if (Type == T_UINT64) + return as<uint64_t>(); + else if (Type == T_Integer) { + int64_t N = as<int64_t>(); + if (N >= 0) + return as<uint64_t>(); + } + return llvm::None; + } llvm::Optional<llvm::StringRef> getAsString() const { if (Type == T_String) return llvm::StringRef(as<std::string>()); @@ -467,11 +491,12 @@ private: friend class OStream; - enum ValueType : char { + enum ValueType : char16_t { T_Null, T_Boolean, T_Double, T_Integer, + T_UINT64, T_StringRef, T_String, T_Object, @@ -479,8 +504,9 @@ private: }; // All members mutable, see moveFrom(). mutable ValueType Type; - mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef, - std::string, json::Array, json::Object> + mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t, + llvm::StringRef, std::string, json::Array, + json::Object> Union; friend bool operator==(const Value &, const Value &); }; @@ -683,6 +709,14 @@ inline bool fromJSON(const Value &E, bool &Out, Path P) { P.report("expected boolean"); return false; } +inline bool fromJSON(const Value &E, uint64_t &Out, Path P) { + if (auto S = E.getAsUINT64()) { + Out = *S; + return true; + } + P.report("expected uint64_t"); + return false; +} inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) { if (auto S = E.getAsNull()) { Out = *S; diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index cfec5796493f..1f32760a6fd1 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -60,7 +60,7 @@ public: } /// Returns true if we don't know any bits. - bool isUnknown() const { return Zero.isNullValue() && One.isNullValue(); } + bool isUnknown() const { return Zero.isZero() && One.isZero(); } /// Resets the known state of all bits. void resetAll() { @@ -71,13 +71,13 @@ public: /// Returns true if value is all zero. bool isZero() const { assert(!hasConflict() && "KnownBits conflict!"); - return Zero.isAllOnesValue(); + return Zero.isAllOnes(); } /// Returns true if value is all one bits. bool isAllOnes() const { assert(!hasConflict() && "KnownBits conflict!"); - return One.isAllOnesValue(); + return One.isAllOnes(); } /// Make all bits known to be zero and discard any previous information. @@ -99,10 +99,12 @@ public: bool isNonNegative() const { return Zero.isSignBitSet(); } /// Returns true if this value is known to be non-zero. - bool isNonZero() const { return !One.isNullValue(); } + bool isNonZero() const { return !One.isZero(); } /// Returns true if this value is known to be positive. - bool isStrictlyPositive() const { return Zero.isSignBitSet() && !One.isNullValue(); } + bool isStrictlyPositive() const { + return Zero.isSignBitSet() && !One.isZero(); + } /// Make this value negative. void makeNegative() { @@ -280,6 +282,10 @@ public: return getBitWidth() - Zero.countPopulation(); } + unsigned countMaxActiveBits() const { + return getBitWidth() - countMinLeadingZeros(); + } + /// Create known bits from a known constant. static KnownBits makeConstant(const APInt &C) { return KnownBits(~C, C); @@ -292,7 +298,7 @@ public: /// Return true if LHS and RHS have no common bits set. static bool haveNoCommonBitsSet(const KnownBits &LHS, const KnownBits &RHS) { - return (LHS.Zero | RHS.Zero).isAllOnesValue(); + return (LHS.Zero | RHS.Zero).isAllOnes(); } /// Compute known bits resulting from adding LHS, RHS and a 1-bit Carry. @@ -304,7 +310,8 @@ public: KnownBits RHS); /// Compute known bits resulting from multiplying LHS and RHS. - static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS); + static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, + bool SelfMultiply = false); /// Compute known bits from sign-extended multiply-hi. static KnownBits mulhs(const KnownBits &LHS, const KnownBits &RHS); diff --git a/llvm/include/llvm/Support/MD5.h b/llvm/include/llvm/Support/MD5.h index 3b2d5b974d0b..3b960cd4fd88 100644 --- a/llvm/include/llvm/Support/MD5.h +++ b/llvm/include/llvm/Support/MD5.h @@ -39,18 +39,6 @@ template <unsigned N> class SmallString; template <typename T> class ArrayRef; class MD5 { - // Any 32-bit or wider unsigned integer data type will do. - typedef uint32_t MD5_u32plus; - - MD5_u32plus a = 0x67452301; - MD5_u32plus b = 0xefcdab89; - MD5_u32plus c = 0x98badcfe; - MD5_u32plus d = 0x10325476; - MD5_u32plus hi = 0; - MD5_u32plus lo = 0; - uint8_t buffer[64]; - MD5_u32plus block[16]; - public: struct MD5Result { std::array<uint8_t, 16> Bytes; @@ -90,6 +78,14 @@ public: /// Finishes off the hash and puts the result in result. void final(MD5Result &Result); + /// Finishes off the hash, and returns a reference to the 16-byte hash data. + StringRef final(); + + /// Finishes off the hash, and returns a reference to the 16-byte hash data. + /// This is suitable for getting the MD5 at any time without invalidating the + /// internal state, so that more calls can be made into `update`. + StringRef result(); + /// Translates the bytes in \p Res to a hex string that is /// deposited into \p Str. The result will be of length 32. static void stringifyResult(MD5Result &Result, SmallString<32> &Str); @@ -98,6 +94,23 @@ public: static std::array<uint8_t, 16> hash(ArrayRef<uint8_t> Data); private: + // Any 32-bit or wider unsigned integer data type will do. + typedef uint32_t MD5_u32plus; + + // Internal State + struct { + MD5_u32plus a = 0x67452301; + MD5_u32plus b = 0xefcdab89; + MD5_u32plus c = 0x98badcfe; + MD5_u32plus d = 0x10325476; + MD5_u32plus hi = 0; + MD5_u32plus lo = 0; + uint8_t buffer[64]; + MD5_u32plus block[16]; + } InternalState; + + MD5Result Result; + const uint8_t *body(ArrayRef<uint8_t> Data); }; diff --git a/llvm/include/llvm/Support/MSP430AttributeParser.h b/llvm/include/llvm/Support/MSP430AttributeParser.h new file mode 100644 index 000000000000..bc9b21494470 --- /dev/null +++ b/llvm/include/llvm/Support/MSP430AttributeParser.h @@ -0,0 +1,44 @@ +//===-- MSP430AttributeParser.h - MSP430 Attribute Parser -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains support routines for parsing MSP430 ELF build attributes. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_MSP430ATTRIBUTEPARSER_H +#define LLVM_SUPPORT_MSP430ATTRIBUTEPARSER_H + +#include "llvm/Support/ELFAttributeParser.h" +#include "llvm/Support/MSP430Attributes.h" + +namespace llvm { +class MSP430AttributeParser : public ELFAttributeParser { + struct DisplayHandler { + MSP430Attrs::AttrType Attribute; + Error (MSP430AttributeParser::*Routine)(MSP430Attrs::AttrType); + }; + static const std::array<DisplayHandler, 4> DisplayRoutines; + + Error parseISA(MSP430Attrs::AttrType Tag); + Error parseCodeModel(MSP430Attrs::AttrType Tag); + Error parseDataModel(MSP430Attrs::AttrType Tag); + Error parseEnumSize(MSP430Attrs::AttrType Tag); + + Error handler(uint64_t Tag, bool &Handled) override; + +public: + MSP430AttributeParser(ScopedPrinter *SW) + : ELFAttributeParser(SW, MSP430Attrs::getMSP430AttributeTags(), + "mspabi") {} + MSP430AttributeParser() + : ELFAttributeParser(MSP430Attrs::getMSP430AttributeTags(), "mspabi") {} +}; +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Support/MSP430Attributes.h b/llvm/include/llvm/Support/MSP430Attributes.h new file mode 100644 index 000000000000..fccd65e844c3 --- /dev/null +++ b/llvm/include/llvm/Support/MSP430Attributes.h @@ -0,0 +1,44 @@ +//===-- MSP430Attributes.h - MSP430 Attributes ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------------===// +/// +/// \file +/// This file contains enumerations for MSP430 ELF build attributes as +/// defined in the MSP430 ELF psABI specification. +/// +/// MSP430 ELF psABI specification +/// +/// https://www.ti.com/lit/pdf/slaa534 +/// +//===----------------------------------------------------------------------===// +#ifndef LLVM_SUPPORT_MSP430ATTRIBUTES_H +#define LLVM_SUPPORT_MSP430ATTRIBUTES_H + +#include "llvm/Support/ELFAttributes.h" + +namespace llvm { +namespace MSP430Attrs { + +const TagNameMap &getMSP430AttributeTags(); + +enum AttrType : unsigned { + // Attribute types in ELF/.MSP430.attributes. + TagISA = 4, + TagCodeModel = 6, + TagDataModel = 8, + TagEnumSize = 10 +}; + +enum ISA { ISAMSP430 = 1, ISAMSP430X = 2 }; +enum CodeModel { CMSmall = 1, CMLarge = 2 }; +enum DataModel { DMSmall = 1, DMLarge = 2, DMRestricted = 3 }; +enum EnumSize { ESSmall = 1, ESInteger = 2, ESDontCare = 3 }; + +} // namespace MSP430Attrs +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h index 31f2d5a48183..ce10a4c58dfe 100644 --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -270,9 +270,10 @@ namespace llvm { funcref = 175, // WebAssembly's funcref type externref = 176, // WebAssembly's externref type x86amx = 177, // This is an X86 AMX value + i64x8 = 178, // 8 Consecutive GPRs (AArch64) FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = x86amx, // This always remains at the end of the list. + LAST_VALUETYPE = i64x8, // This always remains at the end of the list. VALUETYPE_SIZE = LAST_VALUETYPE + 1, // This is the current maximum for LAST_VALUETYPE. @@ -987,6 +988,7 @@ namespace llvm { case nxv16f16: case nxv8f32: case nxv4f64: return TypeSize::Scalable(256); + case i64x8: case v512i1: case v64i8: case v32i16: @@ -1403,51 +1405,61 @@ namespace llvm { /// SimpleValueType Iteration /// @{ static auto all_valuetypes() { - return seq_inclusive(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto integer_valuetypes() { - return seq_inclusive(MVT::FIRST_INTEGER_VALUETYPE, - MVT::LAST_INTEGER_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_INTEGER_VALUETYPE, + MVT::LAST_INTEGER_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto fp_valuetypes() { - return seq_inclusive(MVT::FIRST_FP_VALUETYPE, MVT::LAST_FP_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_FP_VALUETYPE, MVT::LAST_FP_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto vector_valuetypes() { - return seq_inclusive(MVT::FIRST_VECTOR_VALUETYPE, - MVT::LAST_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_VECTOR_VALUETYPE, + MVT::LAST_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto fixedlen_vector_valuetypes() { - return seq_inclusive(MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE, - MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE, + MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto scalable_vector_valuetypes() { - return seq_inclusive(MVT::FIRST_SCALABLE_VECTOR_VALUETYPE, - MVT::LAST_SCALABLE_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_SCALABLE_VECTOR_VALUETYPE, + MVT::LAST_SCALABLE_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto integer_fixedlen_vector_valuetypes() { - return seq_inclusive(MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE, - MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE, + MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto fp_fixedlen_vector_valuetypes() { - return seq_inclusive(MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE, - MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE, + MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto integer_scalable_vector_valuetypes() { - return seq_inclusive(MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE, - MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE, + MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto fp_scalable_vector_valuetypes() { - return seq_inclusive(MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE, - MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE, + MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } /// @} }; diff --git a/llvm/include/llvm/Support/Memory.h b/llvm/include/llvm/Support/Memory.h index 31e0abbcdb61..d7d60371d315 100644 --- a/llvm/include/llvm/Support/Memory.h +++ b/llvm/include/llvm/Support/Memory.h @@ -37,7 +37,7 @@ namespace sys { /// The size as it was allocated. This is always greater or equal to the /// size that was originally requested. size_t allocatedSize() const { return AllocatedSize; } - + private: void *Address; ///< Address of first byte of memory area size_t AllocatedSize; ///< Size, in bytes of the memory area @@ -148,13 +148,22 @@ namespace sys { return *this; } ~OwningMemoryBlock() { - Memory::releaseMappedMemory(M); + if (M.base()) + Memory::releaseMappedMemory(M); } void *base() const { return M.base(); } /// The size as it was allocated. This is always greater or equal to the /// size that was originally requested. size_t allocatedSize() const { return M.allocatedSize(); } MemoryBlock getMemoryBlock() const { return M; } + std::error_code release() { + std::error_code EC; + if (M.base()) { + EC = Memory::releaseMappedMemory(M); + M = MemoryBlock(); + } + return EC; + } private: MemoryBlock M; }; diff --git a/llvm/include/llvm/Support/PGOOptions.h b/llvm/include/llvm/Support/PGOOptions.h new file mode 100644 index 000000000000..2141e2159c0c --- /dev/null +++ b/llvm/include/llvm/Support/PGOOptions.h @@ -0,0 +1,65 @@ +//===------ PGOOptions.h -- PGO option tunables ----------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Define option tunables for PGO. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_PGOOPTIONS_H +#define LLVM_SUPPORT_PGOOPTIONS_H + +#include "llvm/Support/Error.h" + +namespace llvm { + +/// A struct capturing PGO tunables. +struct PGOOptions { + enum PGOAction { NoAction, IRInstr, IRUse, SampleUse }; + enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse }; + PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "", + std::string ProfileRemappingFile = "", PGOAction Action = NoAction, + CSPGOAction CSAction = NoCSAction, + bool DebugInfoForProfiling = false, + bool PseudoProbeForProfiling = false) + : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), + ProfileRemappingFile(ProfileRemappingFile), Action(Action), + CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling || + (Action == SampleUse && + !PseudoProbeForProfiling)), + PseudoProbeForProfiling(PseudoProbeForProfiling) { + // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can + // callback with IRUse action without ProfileFile. + + // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse. + assert(this->CSAction == NoCSAction || + (this->Action != IRInstr && this->Action != SampleUse)); + + // For CSIRInstr, CSProfileGenFile also needs to be nonempty. + assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty()); + + // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share + // a profile. + assert(this->CSAction != CSIRUse || this->Action == IRUse); + + // If neither Action nor CSAction, DebugInfoForProfiling or + // PseudoProbeForProfiling needs to be true. + assert(this->Action != NoAction || this->CSAction != NoCSAction || + this->DebugInfoForProfiling || this->PseudoProbeForProfiling); + } + std::string ProfileFile; + std::string CSProfileGenFile; + std::string ProfileRemappingFile; + PGOAction Action; + CSPGOAction CSAction; + bool DebugInfoForProfiling; + bool PseudoProbeForProfiling; +}; +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h index 28d171d45256..5c3b26d5754c 100644 --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -40,7 +40,10 @@ class Latch { public: explicit Latch(uint32_t Count = 0) : Count(Count) {} - ~Latch() { sync(); } + ~Latch() { + // Ensure at least that sync() was called. + assert(Count == 0); + } void inc() { std::lock_guard<std::mutex> lock(Mutex); diff --git a/llvm/include/llvm/Support/Path.h b/llvm/include/llvm/Support/Path.h index af70e086a1b6..da5095714f48 100644 --- a/llvm/include/llvm/Support/Path.h +++ b/llvm/include/llvm/Support/Path.h @@ -25,7 +25,29 @@ namespace llvm { namespace sys { namespace path { -enum class Style { windows, posix, native }; +enum class Style { + native, + posix, + windows_slash, + windows_backslash, + windows = windows_backslash, // deprecated +}; + +/// Check if \p S uses POSIX path rules. +constexpr bool is_style_posix(Style S) { + if (S == Style::posix) + return true; + if (S != Style::native) + return false; +#if defined(_WIN32) + return false; +#else + return true; +#endif +} + +/// Check if \p S uses Windows path rules. +constexpr bool is_style_windows(Style S) { return !is_style_posix(S); } /// @name Lexical Component Iterator /// @{ @@ -174,6 +196,21 @@ bool replace_path_prefix(SmallVectorImpl<char> &Path, StringRef OldPrefix, StringRef NewPrefix, Style style = Style::native); +/// Remove redundant leading "./" pieces and consecutive separators. +/// +/// @param path Input path. +/// @result The cleaned-up \a path. +StringRef remove_leading_dotslash(StringRef path, Style style = Style::native); + +/// In-place remove any './' and optionally '../' components from a path. +/// +/// @param path processed path +/// @param remove_dot_dot specify if '../' (except for leading "../") should be +/// removed +/// @result True if path was changed +bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false, + Style style = Style::native); + /// Append to path. /// /// @code @@ -212,7 +249,7 @@ void append(SmallVectorImpl<char> &path, const_iterator begin, /// Convert path to the native form. This is used to give paths to users and /// operating system calls in the platform's normal way. For example, on Windows -/// all '/' are converted to '\'. +/// all '/' are converted to '\'. On Unix, it converts all '\' to '/'. /// /// @param path A path that is transformed to native format. /// @param result Holds the result of the transformation. @@ -226,6 +263,17 @@ void native(const Twine &path, SmallVectorImpl<char> &result, /// @param path A path that is transformed to native format. void native(SmallVectorImpl<char> &path, Style style = Style::native); +/// For Windows path styles, convert path to use the preferred path separators. +/// For other styles, do nothing. +/// +/// @param path A path that is transformed to preferred format. +inline void make_preferred(SmallVectorImpl<char> &path, + Style style = Style::native) { + if (!is_style_windows(style)) + return; + native(path, style); +} + /// Replaces backslashes with slashes if Windows. /// /// @param path processed path @@ -499,21 +547,6 @@ bool is_absolute_gnu(const Twine &path, Style style = Style::native); /// @result True if the path is relative, false if it is not. bool is_relative(const Twine &path, Style style = Style::native); -/// Remove redundant leading "./" pieces and consecutive separators. -/// -/// @param path Input path. -/// @result The cleaned-up \a path. -StringRef remove_leading_dotslash(StringRef path, Style style = Style::native); - -/// In-place remove any './' and optionally '../' components from a path. -/// -/// @param path processed path -/// @param remove_dot_dot specify if '../' (except for leading "../") should be -/// removed -/// @result True if path was changed -bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false, - Style style = Style::native); - } // end namespace path } // end namespace sys } // end namespace llvm diff --git a/llvm/include/llvm/Support/Process.h b/llvm/include/llvm/Support/Process.h index 6687e5e7ff9a..ee03efeed9b2 100644 --- a/llvm/include/llvm/Support/Process.h +++ b/llvm/include/llvm/Support/Process.h @@ -214,12 +214,10 @@ public: /// In that case, the control flow will resume after RunSafely(), like for a /// crash, rather than exiting the current process. /// Use \arg NoCleanup for calling _exit() instead of exit(). - LLVM_ATTRIBUTE_NORETURN - static void Exit(int RetCode, bool NoCleanup = false); + [[noreturn]] static void Exit(int RetCode, bool NoCleanup = false); private: - LLVM_ATTRIBUTE_NORETURN - static void ExitNoCleanup(int RetCode); + [[noreturn]] static void ExitNoCleanup(int RetCode); }; } diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h new file mode 100644 index 000000000000..7110de601123 --- /dev/null +++ b/llvm/include/llvm/Support/RISCVISAInfo.h @@ -0,0 +1,89 @@ +//===-- RISCVISAInfo.h - RISCV ISA Information ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_RISCVISAINFO_H +#define LLVM_SUPPORT_RISCVISAINFO_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" + +#include <map> +#include <string> +#include <vector> + +namespace llvm { +struct RISCVExtensionInfo { + std::string ExtName; + unsigned MajorVersion; + unsigned MinorVersion; +}; + +class RISCVISAInfo { +public: + RISCVISAInfo(const RISCVISAInfo &) = delete; + RISCVISAInfo &operator=(const RISCVISAInfo &) = delete; + + static bool compareExtension(const std::string &LHS, const std::string &RHS); + + /// Helper class for OrderedExtensionMap. + struct ExtensionComparator { + bool operator()(const std::string &LHS, const std::string &RHS) const { + return compareExtension(LHS, RHS); + } + }; + + /// OrderedExtensionMap is std::map, it's specialized to keep entries + /// in canonical order of extension. + typedef std::map<std::string, RISCVExtensionInfo, ExtensionComparator> + OrderedExtensionMap; + + /// Parse RISCV ISA info from arch string. + static llvm::Expected<std::unique_ptr<RISCVISAInfo>> + parseArchString(StringRef Arch, bool EnableExperimentalExtension, + bool ExperimentalExtensionVersionCheck = true); + + /// Parse RISCV ISA info from feature vector. + static llvm::Expected<std::unique_ptr<RISCVISAInfo>> + parseFeatures(unsigned XLen, const std::vector<std::string> &Features); + + /// Convert RISCV ISA info to a feature vector. + void toFeatures(std::vector<StringRef> &Features, + std::function<StringRef(const Twine &)> StrAlloc) const; + + const OrderedExtensionMap &getExtensions() const { return Exts; }; + + unsigned getXLen() const { return XLen; }; + unsigned getFLen() const { return FLen; }; + + bool hasExtension(StringRef Ext) const; + std::string toString() const; + + static bool isSupportedExtensionFeature(StringRef Ext); + static bool isSupportedExtension(StringRef Ext); + static bool isSupportedExtension(StringRef Ext, unsigned MajorVersion, + unsigned MinorVersion); + +private: + RISCVISAInfo(unsigned XLen) : XLen(XLen), FLen(0) {} + + unsigned XLen; + unsigned FLen; + + OrderedExtensionMap Exts; + + void addExtension(StringRef ExtName, unsigned MajorVersion, + unsigned MinorVersion); + + void updateFLen(); +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Support/RISCVTargetParser.def b/llvm/include/llvm/Support/RISCVTargetParser.def index 6a06f9258105..f658cdb91c6b 100644 --- a/llvm/include/llvm/Support/RISCVTargetParser.def +++ b/llvm/include/llvm/Support/RISCVTargetParser.def @@ -19,9 +19,17 @@ PROC(ROCKET_RV32, {"rocket-rv32"}, FK_NONE, {""}) PROC(ROCKET_RV64, {"rocket-rv64"}, FK_64BIT, {""}) PROC(SIFIVE_732, {"sifive-7-rv32"}, FK_NONE, {""}) PROC(SIFIVE_764, {"sifive-7-rv64"}, FK_64BIT, {""}) +PROC(SIFIVE_E20, {"sifive-e20"}, FK_NONE, {"rv32imc"}) +PROC(SIFIVE_E21, {"sifive-e21"}, FK_NONE, {"rv32imac"}) +PROC(SIFIVE_E24, {"sifive-e24"}, FK_NONE, {"rv32imafc"}) PROC(SIFIVE_E31, {"sifive-e31"}, FK_NONE, {"rv32imac"}) -PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"}) +PROC(SIFIVE_E34, {"sifive-e34"}, FK_NONE, {"rv32imafc"}) PROC(SIFIVE_E76, {"sifive-e76"}, FK_NONE, {"rv32imafc"}) +PROC(SIFIVE_S21, {"sifive-s21"}, FK_64BIT, {"rv64imac"}) +PROC(SIFIVE_S51, {"sifive-s51"}, FK_64BIT, {"rv64imac"}) +PROC(SIFIVE_S54, {"sifive-s54"}, FK_64BIT, {"rv64gc"}) +PROC(SIFIVE_S76, {"sifive-s76"}, FK_64BIT, {"rv64gc"}) +PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"}) PROC(SIFIVE_U74, {"sifive-u74"}, FK_64BIT, {"rv64gc"}) #undef PROC diff --git a/llvm/include/llvm/Support/Signposts.h b/llvm/include/llvm/Support/Signposts.h index bc6abba0a0e1..dabbba6f89d1 100644 --- a/llvm/include/llvm/Support/Signposts.h +++ b/llvm/include/llvm/Support/Signposts.h @@ -1,9 +1,8 @@ //===-- llvm/Support/Signposts.h - Interval debug annotations ---*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,17 +17,8 @@ #define LLVM_SUPPORT_SIGNPOSTS_H #include "llvm/ADT/StringRef.h" -#include "llvm/Config/llvm-config.h" #include <memory> -#if LLVM_SUPPORT_XCODE_SIGNPOSTS -#include <Availability.h> -#include <os/signpost.h> -#endif - -#define SIGNPOSTS_AVAILABLE() \ - __builtin_available(macos 10.14, iOS 12, tvOS 12, watchOS 5, *) - namespace llvm { class SignpostEmitterImpl; @@ -45,33 +35,8 @@ public: /// Begin a signposted interval for a given object. void startInterval(const void *O, StringRef Name); - -#if LLVM_SUPPORT_XCODE_SIGNPOSTS - os_log_t &getLogger() const; - os_signpost_id_t getSignpostForObject(const void *O); -#endif - - /// A macro to take advantage of the special format string handling - /// in the os_signpost API. The format string substitution is - /// deferred to the log consumer and done outside of the - /// application. -#if LLVM_SUPPORT_XCODE_SIGNPOSTS -#define SIGNPOST_EMITTER_START_INTERVAL(SIGNPOST_EMITTER, O, ...) \ - do { \ - if ((SIGNPOST_EMITTER).isEnabled()) \ - if (SIGNPOSTS_AVAILABLE()) \ - os_signpost_interval_begin((SIGNPOST_EMITTER).getLogger(), \ - (SIGNPOST_EMITTER).getSignpostForObject(O), \ - "LLVM Timers", __VA_ARGS__); \ - } while (0) -#else -#define SIGNPOST_EMITTER_START_INTERVAL(SIGNPOST_EMITTER, O, ...) \ - do { \ - } while (0) -#endif - /// End a signposted interval for a given object. - void endInterval(const void *O); + void endInterval(const void *O, StringRef Name); }; } // end namespace llvm diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index fbe0d1a55bfc..b34b885ddc35 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -652,6 +652,9 @@ HANDLE_TARGET_OPCODE(G_UMAX) /// Generic integer absolute value. HANDLE_TARGET_OPCODE(G_ABS) +HANDLE_TARGET_OPCODE(G_LROUND) +HANDLE_TARGET_OPCODE(G_LLROUND) + /// Generic BRANCH instruction. This is an unconditional branch. HANDLE_TARGET_OPCODE(G_BR) diff --git a/llvm/include/llvm/Support/TargetSelect.h b/llvm/include/llvm/Support/TargetSelect.h index 9ffb84c4a570..e57614cea758 100644 --- a/llvm/include/llvm/Support/TargetSelect.h +++ b/llvm/include/llvm/Support/TargetSelect.h @@ -41,6 +41,10 @@ extern "C" { #define LLVM_DISASSEMBLER(TargetName) \ void LLVMInitialize##TargetName##Disassembler(); #include "llvm/Config/Disassemblers.def" + +// Declare all of the available TargetMCA initialization functions. +#define LLVM_TARGETMCA(TargetName) void LLVMInitialize##TargetName##TargetMCA(); +#include "llvm/Config/TargetMCAs.def" } namespace llvm { @@ -159,6 +163,14 @@ namespace llvm { return true; #endif } + + /// InitializeAllTargetMCAs - The main program should call + /// this function to initialize the target CustomBehaviour and + /// InstrPostProcess classes. + inline void InitializeAllTargetMCAs() { +#define LLVM_TARGETMCA(TargetName) LLVMInitialize##TargetName##TargetMCA(); +#include "llvm/Config/TargetMCAs.def" + } } #endif diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h index 30bbbd7db8c9..7d1274735a37 100644 --- a/llvm/include/llvm/Support/TypeSize.h +++ b/llvm/include/llvm/Support/TypeSize.h @@ -229,7 +229,6 @@ public: bool isZero() const { return !Value; } bool isNonZero() const { return !isZero(); } explicit operator bool() const { return isNonZero(); } - ScalarTy getValue() const { return Value; } ScalarTy getValue(unsigned Dim) const { return Dim == UnivariateDim ? Value : 0; } @@ -250,7 +249,7 @@ public: //===----------------------------------------------------------------------===// // LinearPolySize - base class for fixed- or scalable sizes. -// ^ ^ +// ^ ^ // | | // | +----- ElementCount - Leaf class to represent an element count // | (vscale x unsigned) @@ -294,7 +293,7 @@ public: static LeafTy getNull() { return get(0, false); } /// Returns the minimum value this size can represent. - ScalarTy getKnownMinValue() const { return this->getValue(); } + ScalarTy getKnownMinValue() const { return this->Value; } /// Returns whether the size is scaled by a runtime quantity (vscale). bool isScalable() const { return this->UnivariateDim == ScalableDim; } /// A return value of true indicates we know at compile time that the number @@ -500,8 +499,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, return OS; } -template <typename T> struct DenseMapInfo; -template <> struct DenseMapInfo<ElementCount> { +template <> struct DenseMapInfo<ElementCount, void> { static inline ElementCount getEmptyKey() { return ElementCount::getScalable(~0U); } diff --git a/llvm/include/llvm/Support/VersionTuple.h b/llvm/include/llvm/Support/VersionTuple.h index a48ae0bf52bd..1a1072d228f1 100644 --- a/llvm/include/llvm/Support/VersionTuple.h +++ b/llvm/include/llvm/Support/VersionTuple.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/Optional.h" +#include "llvm/Support/HashBuilder.h" #include <string> #include <tuple> @@ -164,6 +165,12 @@ public: return llvm::hash_combine(VT.Major, VT.Minor, VT.Subminor, VT.Build); } + template <typename HasherT, llvm::support::endianness Endianness> + friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder, + const VersionTuple &VT) { + HBuilder.add(VT.Major, VT.Minor, VT.Subminor, VT.Build); + } + /// Retrieve a string representation of the version number. std::string getAsString() const; diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h index 323e6719645d..10d2389ee079 100644 --- a/llvm/include/llvm/Support/VirtualFileSystem.h +++ b/llvm/include/llvm/Support/VirtualFileSystem.h @@ -121,6 +121,14 @@ public: /// Closes the file. virtual std::error_code close() = 0; + + // Get the same file with a different path. + static ErrorOr<std::unique_ptr<File>> + getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P); + +protected: + // Set the file's underlying path. + virtual void setPath(const Twine &Path) {} }; /// A member of a directory, yielded by a directory_iterator. @@ -596,6 +604,17 @@ class RedirectingFileSystemParser; /// contain multiple path components (e.g. /path/to/file). However, any /// directory in such a path that contains more than one child must be uniquely /// represented by a 'directory' entry. +/// +/// When the 'use-external-name' field is set, calls to \a vfs::File::status() +/// give the external (remapped) filesystem name instead of the name the file +/// was accessed by. This is an intentional leak through the \a +/// RedirectingFileSystem abstraction layer. It enables clients to discover +/// (and use) the external file location when communicating with users or tools +/// that don't use the same VFS overlay. +/// +/// FIXME: 'use-external-name' causes behaviour that's inconsistent with how +/// "real" filesystems behave. Maybe there should be a separate channel for +/// this information. class RedirectingFileSystem : public vfs::FileSystem { public: enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File }; @@ -746,6 +765,12 @@ private: /// with the given error code on a path associated with the provided Entry. bool shouldFallBackToExternalFS(std::error_code EC, Entry *E = nullptr) const; + /// Get the File status, or error, from the underlying external file system. + /// This returns the status with the originally requested name, while looking + /// up the entry using the canonical path. + ErrorOr<Status> getExternalStatus(const Twine &CanonicalPath, + const Twine &OriginalPath) const; + // In a RedirectingFileSystem, keys can be specified in Posix or Windows // style (or even a mixture of both), so this comparison helper allows // slashes (representing a root) to match backslashes (and vice versa). Note @@ -777,12 +802,7 @@ private: /// Whether to perform case-sensitive comparisons. /// /// Currently, case-insensitive matching only works correctly with ASCII. - bool CaseSensitive = -#ifdef _WIN32 - false; -#else - true; -#endif + bool CaseSensitive = is_style_posix(sys::path::Style::native); /// IsRelativeOverlay marks whether a ExternalContentsPrefixDir path must /// be prefixed in every 'external-contents' when reading from YAML files. @@ -808,7 +828,8 @@ private: Entry *From) const; /// Get the status for a path with the provided \c LookupResult. - ErrorOr<Status> status(const Twine &Path, const LookupResult &Result); + ErrorOr<Status> status(const Twine &CanonicalPath, const Twine &OriginalPath, + const LookupResult &Result); public: /// Looks up \p Path in \c Roots and returns a LookupResult giving the diff --git a/llvm/include/llvm/Support/Windows/WindowsSupport.h b/llvm/include/llvm/Support/Windows/WindowsSupport.h index a45eeaba4ad5..917822678e97 100644 --- a/llvm/include/llvm/Support/Windows/WindowsSupport.h +++ b/llvm/include/llvm/Support/Windows/WindowsSupport.h @@ -68,10 +68,10 @@ llvm::VersionTuple GetWindowsOSVersion(); bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix); // Include GetLastError() in a fatal error message. -LLVM_ATTRIBUTE_NORETURN inline void ReportLastErrorFatal(const char *Msg) { +[[noreturn]] inline void ReportLastErrorFatal(const char *Msg) { std::string ErrMsg; MakeErrMsg(&ErrMsg, Msg); - llvm::report_fatal_error(ErrMsg); + llvm::report_fatal_error(Twine(ErrMsg)); } template <typename HandleTraits> diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h index 757a3c0c8a71..aca717a9f6cb 100644 --- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h +++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h @@ -31,6 +31,8 @@ namespace X86Disassembler { #define XOP9_MAP_SYM x86DisassemblerXOP9Opcodes #define XOPA_MAP_SYM x86DisassemblerXOPAOpcodes #define THREEDNOW_MAP_SYM x86Disassembler3DNowOpcodes +#define MAP5_SYM x86DisassemblerMap5Opcodes +#define MAP6_SYM x86DisassemblerMap6Opcodes #define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers" #define CONTEXTS_STR "x86DisassemblerContexts" @@ -42,6 +44,8 @@ namespace X86Disassembler { #define XOP9_MAP_STR "x86DisassemblerXOP9Opcodes" #define XOPA_MAP_STR "x86DisassemblerXOPAOpcodes" #define THREEDNOW_MAP_STR "x86Disassembler3DNowOpcodes" +#define MAP5_STR "x86DisassemblerMap5Opcodes" +#define MAP6_STR "x86DisassemblerMap6Opcodes" // Attributes of an instruction that must be known before the opcode can be // processed correctly. Most of these indicate the presence of particular @@ -292,7 +296,9 @@ enum OpcodeType { XOP8_MAP = 4, XOP9_MAP = 5, XOPA_MAP = 6, - THREEDNOW_MAP = 7 + THREEDNOW_MAP = 7, + MAP5 = 8, + MAP6 = 9 }; // The following structs are used for the hierarchical decode table. After diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index ffcc2238e3ce..4443d822d3e8 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -91,54 +91,59 @@ X86_CPU_SUBTYPE(AMDFAM19H_ZNVER3, "znver3") X86_CPU_SUBTYPE(INTEL_COREI7_ROCKETLAKE, "rocketlake") #undef X86_CPU_SUBTYPE - -// This macro is used for cpu types present in compiler-rt/libgcc. +// This macro is used for cpu types present in compiler-rt/libgcc. The third +// parameter PRIORITY is as required by the attribute 'target' checking. Note +// that not all are supported/prioritized by GCC, so synchronization with GCC's +// implementation may require changing some existing values. +// +// We cannot just re-sort the list though because its order is dictated by the +// order of bits in CodeGenFunction::GetX86CpuSupportsMask. #ifndef X86_FEATURE_COMPAT -#define X86_FEATURE_COMPAT(ENUM, STR) X86_FEATURE(ENUM, STR) +#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) X86_FEATURE(ENUM, STR) #endif #ifndef X86_FEATURE #define X86_FEATURE(ENUM, STR) #endif -X86_FEATURE_COMPAT(CMOV, "cmov") -X86_FEATURE_COMPAT(MMX, "mmx") -X86_FEATURE_COMPAT(POPCNT, "popcnt") -X86_FEATURE_COMPAT(SSE, "sse") -X86_FEATURE_COMPAT(SSE2, "sse2") -X86_FEATURE_COMPAT(SSE3, "sse3") -X86_FEATURE_COMPAT(SSSE3, "ssse3") -X86_FEATURE_COMPAT(SSE4_1, "sse4.1") -X86_FEATURE_COMPAT(SSE4_2, "sse4.2") -X86_FEATURE_COMPAT(AVX, "avx") -X86_FEATURE_COMPAT(AVX2, "avx2") -X86_FEATURE_COMPAT(SSE4_A, "sse4a") -X86_FEATURE_COMPAT(FMA4, "fma4") -X86_FEATURE_COMPAT(XOP, "xop") -X86_FEATURE_COMPAT(FMA, "fma") -X86_FEATURE_COMPAT(AVX512F, "avx512f") -X86_FEATURE_COMPAT(BMI, "bmi") -X86_FEATURE_COMPAT(BMI2, "bmi2") -X86_FEATURE_COMPAT(AES, "aes") -X86_FEATURE_COMPAT(PCLMUL, "pclmul") -X86_FEATURE_COMPAT(AVX512VL, "avx512vl") -X86_FEATURE_COMPAT(AVX512BW, "avx512bw") -X86_FEATURE_COMPAT(AVX512DQ, "avx512dq") -X86_FEATURE_COMPAT(AVX512CD, "avx512cd") -X86_FEATURE_COMPAT(AVX512ER, "avx512er") -X86_FEATURE_COMPAT(AVX512PF, "avx512pf") -X86_FEATURE_COMPAT(AVX512VBMI, "avx512vbmi") -X86_FEATURE_COMPAT(AVX512IFMA, "avx512ifma") -X86_FEATURE_COMPAT(AVX5124VNNIW, "avx5124vnniw") -X86_FEATURE_COMPAT(AVX5124FMAPS, "avx5124fmaps") -X86_FEATURE_COMPAT(AVX512VPOPCNTDQ, "avx512vpopcntdq") -X86_FEATURE_COMPAT(AVX512VBMI2, "avx512vbmi2") -X86_FEATURE_COMPAT(GFNI, "gfni") -X86_FEATURE_COMPAT(VPCLMULQDQ, "vpclmulqdq") -X86_FEATURE_COMPAT(AVX512VNNI, "avx512vnni") -X86_FEATURE_COMPAT(AVX512BITALG, "avx512bitalg") -X86_FEATURE_COMPAT(AVX512BF16, "avx512bf16") -X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect") +X86_FEATURE_COMPAT(CMOV, "cmov", 0) +X86_FEATURE_COMPAT(MMX, "mmx", 1) +X86_FEATURE_COMPAT(POPCNT, "popcnt", 9) +X86_FEATURE_COMPAT(SSE, "sse", 2) +X86_FEATURE_COMPAT(SSE2, "sse2", 3) +X86_FEATURE_COMPAT(SSE3, "sse3", 4) +X86_FEATURE_COMPAT(SSSE3, "ssse3", 5) +X86_FEATURE_COMPAT(SSE4_1, "sse4.1", 7) +X86_FEATURE_COMPAT(SSE4_2, "sse4.2", 8) +X86_FEATURE_COMPAT(AVX, "avx", 12) +X86_FEATURE_COMPAT(AVX2, "avx2", 18) +X86_FEATURE_COMPAT(SSE4_A, "sse4a", 6) +X86_FEATURE_COMPAT(FMA4, "fma4", 14) +X86_FEATURE_COMPAT(XOP, "xop", 15) +X86_FEATURE_COMPAT(FMA, "fma", 16) +X86_FEATURE_COMPAT(AVX512F, "avx512f", 19) +X86_FEATURE_COMPAT(BMI, "bmi", 13) +X86_FEATURE_COMPAT(BMI2, "bmi2", 17) +X86_FEATURE_COMPAT(AES, "aes", 10) +X86_FEATURE_COMPAT(PCLMUL, "pclmul", 11) +X86_FEATURE_COMPAT(AVX512VL, "avx512vl", 20) +X86_FEATURE_COMPAT(AVX512BW, "avx512bw", 21) +X86_FEATURE_COMPAT(AVX512DQ, "avx512dq", 22) +X86_FEATURE_COMPAT(AVX512CD, "avx512cd", 23) +X86_FEATURE_COMPAT(AVX512ER, "avx512er", 24) +X86_FEATURE_COMPAT(AVX512PF, "avx512pf", 25) +X86_FEATURE_COMPAT(AVX512VBMI, "avx512vbmi", 26) +X86_FEATURE_COMPAT(AVX512IFMA, "avx512ifma", 27) +X86_FEATURE_COMPAT(AVX5124VNNIW, "avx5124vnniw", 28) +X86_FEATURE_COMPAT(AVX5124FMAPS, "avx5124fmaps", 29) +X86_FEATURE_COMPAT(AVX512VPOPCNTDQ, "avx512vpopcntdq", 30) +X86_FEATURE_COMPAT(AVX512VBMI2, "avx512vbmi2", 31) +X86_FEATURE_COMPAT(GFNI, "gfni", 32) +X86_FEATURE_COMPAT(VPCLMULQDQ, "vpclmulqdq", 33) +X86_FEATURE_COMPAT(AVX512VNNI, "avx512vnni", 34) +X86_FEATURE_COMPAT(AVX512BITALG, "avx512bitalg", 35) +X86_FEATURE_COMPAT(AVX512BF16, "avx512bf16", 36) +X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect", 37) // Features below here are not in libgcc/compiler-rt. X86_FEATURE (3DNOW, "3dnow") X86_FEATURE (3DNOWA, "3dnowa") @@ -153,6 +158,7 @@ X86_FEATURE (CLWB, "clwb") X86_FEATURE (CLZERO, "clzero") X86_FEATURE (CMPXCHG16B, "cx16") X86_FEATURE (CMPXCHG8B, "cx8") +X86_FEATURE (CRC32, "crc32") X86_FEATURE (ENQCMD, "enqcmd") X86_FEATURE (F16C, "f16c") X86_FEATURE (FSGSBASE, "fsgsbase") @@ -193,6 +199,7 @@ X86_FEATURE (XSAVEC, "xsavec") X86_FEATURE (XSAVEOPT, "xsaveopt") X86_FEATURE (XSAVES, "xsaves") X86_FEATURE (HRESET, "hreset") +X86_FEATURE (AVX512FP16, "avx512fp16") X86_FEATURE (AVXVNNI, "avxvnni") // These features aren't really CPU features, but the frontend can set them. X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk") @@ -202,3 +209,49 @@ X86_FEATURE (LVI_CFI, "lvi-cfi") X86_FEATURE (LVI_LOAD_HARDENING, "lvi-load-hardening") #undef X86_FEATURE_COMPAT #undef X86_FEATURE + +#ifndef CPU_SPECIFIC +#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) +#endif + +#ifndef CPU_SPECIFIC_ALIAS +#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) +#endif + +CPU_SPECIFIC("generic", 'A', "") +CPU_SPECIFIC("pentium", 'B', "") +CPU_SPECIFIC("pentium_pro", 'C', "+cmov") +CPU_SPECIFIC("pentium_mmx", 'D', "+mmx") +CPU_SPECIFIC("pentium_ii", 'E', "+cmov,+mmx") +CPU_SPECIFIC("pentium_iii", 'H', "+cmov,+mmx,+sse") +CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium_iii") +CPU_SPECIFIC("pentium_4", 'J', "+cmov,+mmx,+sse,+sse2") +CPU_SPECIFIC("pentium_m", 'K', "+cmov,+mmx,+sse,+sse2") +CPU_SPECIFIC("pentium_4_sse3", 'L', "+cmov,+mmx,+sse,+sse2,+sse3") +CPU_SPECIFIC("core_2_duo_ssse3", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3") +CPU_SPECIFIC("core_2_duo_sse4_1", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1") +CPU_SPECIFIC("atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe") +CPU_SPECIFIC("atom_sse4_2", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") +CPU_SPECIFIC("core_i7_sse4_2", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") +CPU_SPECIFIC("core_aes_pclmulqdq", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") +CPU_SPECIFIC("atom_sse4_2_movbe", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") +CPU_SPECIFIC("goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") +CPU_SPECIFIC("sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx") +CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge") +CPU_SPECIFIC("ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx") +CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge") +CPU_SPECIFIC("haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") +CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell") +CPU_SPECIFIC("core_4th_gen_avx_tsx", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") +CPU_SPECIFIC("broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") +CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell") +CPU_SPECIFIC("core_5th_gen_avx_tsx", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") +CPU_SPECIFIC("knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd") +CPU_SPECIFIC_ALIAS("mic_avx512", "knl") +CPU_SPECIFIC("skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx") +CPU_SPECIFIC( "skylake_avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb") +CPU_SPECIFIC("cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi") +CPU_SPECIFIC("knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq") + +#undef CPU_SPECIFIC_ALIAS +#undef CPU_SPECIFIC diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h index ed02066933a7..bfa3e23dbd9d 100644 --- a/llvm/include/llvm/Support/X86TargetParser.h +++ b/llvm/include/llvm/Support/X86TargetParser.h @@ -13,6 +13,7 @@ #ifndef LLVM_SUPPORT_X86TARGETPARSER_H #define LLVM_SUPPORT_X86TARGETPARSER_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -154,6 +155,9 @@ void getFeaturesForCPU(StringRef CPU, SmallVectorImpl<StringRef> &Features); void updateImpliedFeatures(StringRef Feature, bool Enabled, StringMap<bool> &Features); +uint64_t getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs); +unsigned getFeaturePriority(ProcessorFeatures Feat); + } // namespace X86 } // namespace llvm diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h index 9ac9eb300983..bea232e6e000 100644 --- a/llvm/include/llvm/Support/YAMLTraits.h +++ b/llvm/include/llvm/Support/YAMLTraits.h @@ -1641,7 +1641,7 @@ void IO::processKeyWithDefault(const char *Key, Optional<T> &Val, // usually None. bool IsNone = false; if (!outputting()) - if (auto *Node = dyn_cast<ScalarNode>(((Input *)this)->getCurrentNode())) + if (const auto *Node = dyn_cast<ScalarNode>(((Input *)this)->getCurrentNode())) // We use rtrim to ignore possible white spaces that might exist when a // comment is present on the same line. IsNone = Node->getRawValue().rtrim(' ') == "<none>"; diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h index c669c2babad9..98c26ef0b1e5 100644 --- a/llvm/include/llvm/Support/raw_ostream.h +++ b/llvm/include/llvm/Support/raw_ostream.h @@ -330,6 +330,8 @@ public: // changeColor() has no effect until enable_colors(true) is called. virtual void enable_colors(bool enable) { ColorEnabled = enable; } + bool colors_enabled() const { return ColorEnabled; } + /// Tie this stream to the specified stream. Replaces any existing tied-to /// stream. Specifying a nullptr unties the stream. void tie(raw_ostream *TieTo) { TiedStream = TieTo; } @@ -719,7 +721,11 @@ class buffer_unique_ostream : public raw_svector_ostream { public: buffer_unique_ostream(std::unique_ptr<raw_ostream> OS) - : raw_svector_ostream(Buffer), OS(std::move(OS)) {} + : raw_svector_ostream(Buffer), OS(std::move(OS)) { + // Turn off buffering on OS, which we now own, to avoid allocating a buffer + // when the destructor writes only to be immediately flushed again. + this->OS->SetUnbuffered(); + } ~buffer_unique_ostream() override { *OS << str(); } }; diff --git a/llvm/include/llvm/TableGen/DirectiveEmitter.h b/llvm/include/llvm/TableGen/DirectiveEmitter.h index 5c4a736eb107..d73b9ae49235 100644 --- a/llvm/include/llvm/TableGen/DirectiveEmitter.h +++ b/llvm/include/llvm/TableGen/DirectiveEmitter.h @@ -152,7 +152,7 @@ public: } return C; }); - N.erase(std::remove(N.begin(), N.end(), '_'), N.end()); + llvm::erase_value(N, '_'); return N; } diff --git a/llvm/include/llvm/TableGen/Error.h b/llvm/include/llvm/TableGen/Error.h index a0e23aca211e..da0132b10f4f 100644 --- a/llvm/include/llvm/TableGen/Error.h +++ b/llvm/include/llvm/TableGen/Error.h @@ -22,13 +22,10 @@ namespace llvm { void PrintNote(const Twine &Msg); void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg); -LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const Twine &Msg); -LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(ArrayRef<SMLoc> ErrorLoc, - const Twine &Msg); -LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const Record *Rec, - const Twine &Msg); -LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const RecordVal *RecVal, - const Twine &Msg); +[[noreturn]] void PrintFatalNote(const Twine &Msg); +[[noreturn]] void PrintFatalNote(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg); +[[noreturn]] void PrintFatalNote(const Record *Rec, const Twine &Msg); +[[noreturn]] void PrintFatalNote(const RecordVal *RecVal, const Twine &Msg); void PrintWarning(const Twine &Msg); void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg); @@ -40,13 +37,10 @@ void PrintError(const char *Loc, const Twine &Msg); void PrintError(const Record *Rec, const Twine &Msg); void PrintError(const RecordVal *RecVal, const Twine &Msg); -LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const Twine &Msg); -LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, - const Twine &Msg); -LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const Record *Rec, - const Twine &Msg); -LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const RecordVal *RecVal, - const Twine &Msg); +[[noreturn]] void PrintFatalError(const Twine &Msg); +[[noreturn]] void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg); +[[noreturn]] void PrintFatalError(const Record *Rec, const Twine &Msg); +[[noreturn]] void PrintFatalError(const RecordVal *RecVal, const Twine &Msg); void CheckAssert(SMLoc Loc, Init *Condition, Init *Message); diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index 713d9375448c..5869a5cf0423 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -39,6 +39,9 @@ #include <vector> namespace llvm { +namespace detail { +struct RecordContext; +} // namespace detail class ListRecTy; struct MultiClass; @@ -100,7 +103,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RecTy &Ty) { /// 'bit' - Represent a single bit class BitRecTy : public RecTy { - static BitRecTy Shared; + friend detail::RecordContext; BitRecTy() : RecTy(BitRecTyKind) {} @@ -109,7 +112,7 @@ public: return RT->getRecTyKind() == BitRecTyKind; } - static BitRecTy *get() { return &Shared; } + static BitRecTy *get(); std::string getAsString() const override { return "bit"; } @@ -140,7 +143,7 @@ public: /// 'int' - Represent an integer value of no particular size class IntRecTy : public RecTy { - static IntRecTy Shared; + friend detail::RecordContext; IntRecTy() : RecTy(IntRecTyKind) {} @@ -149,7 +152,7 @@ public: return RT->getRecTyKind() == IntRecTyKind; } - static IntRecTy *get() { return &Shared; } + static IntRecTy *get(); std::string getAsString() const override { return "int"; } @@ -158,7 +161,7 @@ public: /// 'string' - Represent an string value class StringRecTy : public RecTy { - static StringRecTy Shared; + friend detail::RecordContext; StringRecTy() : RecTy(StringRecTyKind) {} @@ -167,7 +170,7 @@ public: return RT->getRecTyKind() == StringRecTyKind; } - static StringRecTy *get() { return &Shared; } + static StringRecTy *get(); std::string getAsString() const override; @@ -200,7 +203,7 @@ public: /// 'dag' - Represent a dag fragment class DagRecTy : public RecTy { - static DagRecTy Shared; + friend detail::RecordContext; DagRecTy() : RecTy(DagRecTyKind) {} @@ -209,7 +212,7 @@ public: return RT->getRecTyKind() == DagRecTyKind; } - static DagRecTy *get() { return &Shared; } + static DagRecTy *get(); std::string getAsString() const override; }; @@ -221,6 +224,7 @@ public: class RecordRecTy final : public RecTy, public FoldingSetNode, public TrailingObjects<RecordRecTy, Record *> { friend class Record; + friend detail::RecordContext; unsigned NumClasses; @@ -437,6 +441,8 @@ public: /// '?' - Represents an uninitialized value. class UnsetInit : public Init { + friend detail::RecordContext; + UnsetInit() : Init(IK_UnsetInit) {} public: @@ -468,9 +474,11 @@ public: /// 'true'/'false' - Represent a concrete initializer for a bit. class BitInit final : public TypedInit { + friend detail::RecordContext; + bool Value; - explicit BitInit(bool V) : TypedInit(IK_BitInit, BitRecTy::get()), Value(V) {} + explicit BitInit(bool V, RecTy *T) : TypedInit(IK_BitInit, T), Value(V) {} public: BitInit(const BitInit &) = delete; @@ -637,7 +645,7 @@ public: } StringRef getValue() const { return Value; } - StringFormat getFormat() const { return Format; } + StringFormat getFormat() const { return Format; } bool hasCodeFormat() const { return Format == SF_Code; } Init *convertInitializerTo(RecTy *Ty) const override; @@ -1414,6 +1422,7 @@ private: SMLoc Loc; // Source location of definition of name. PointerIntPair<RecTy *, 2, FieldKind> TyAndKind; Init *Value; + bool IsUsed = false; public: RecordVal(Init *N, RecTy *T, FieldKind K); @@ -1458,6 +1467,11 @@ public: /// Set the value and source location of the field. bool setValue(Init *V, SMLoc NewLoc); + /// Whether this value is used. Useful for reporting warnings, for example + /// when a template argument is unused. + void setUsed(bool Used) { IsUsed = Used; } + bool isUsed() const { return IsUsed; } + void dump() const; /// Print the value to an output stream, possibly with a semicolon. @@ -1483,8 +1497,6 @@ public: }; private: - static unsigned LastID; - Init *Name; // Location where record was instantiated, followed by the location of // multiclass prototypes used. @@ -1515,8 +1527,8 @@ public: // Constructs a record. explicit Record(Init *N, ArrayRef<SMLoc> locs, RecordKeeper &records, bool Anonymous = false, bool Class = false) - : Name(N), Locs(locs.begin(), locs.end()), TrackedRecords(records), - ID(LastID++), IsAnonymous(Anonymous), IsClass(Class) { + : Name(N), Locs(locs.begin(), locs.end()), TrackedRecords(records), + ID(getNewUID()), IsAnonymous(Anonymous), IsClass(Class) { checkName(); } @@ -1528,12 +1540,12 @@ public: // ID number. Don't copy CorrespondingDefInit either, since it's owned by the // original record. All other fields can be copied normally. Record(const Record &O) - : Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs), - Values(O.Values), Assertions(O.Assertions), SuperClasses(O.SuperClasses), - TrackedRecords(O.TrackedRecords), ID(LastID++), - IsAnonymous(O.IsAnonymous), IsClass(O.IsClass) { } + : Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs), + Values(O.Values), Assertions(O.Assertions), + SuperClasses(O.SuperClasses), TrackedRecords(O.TrackedRecords), + ID(getNewUID()), IsAnonymous(O.IsAnonymous), IsClass(O.IsClass) {} - static unsigned getNewUID() { return LastID++; } + static unsigned getNewUID(); unsigned getID() const { return ID; } @@ -1632,6 +1644,7 @@ public: } void checkRecordAssertions(); + void checkUnusedTemplateArgs(); bool isSubClassOf(const Record *R) const { for (const auto &SCPair : SuperClasses) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index e3e1d5fc3c65..72c974834a2f 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -225,6 +225,18 @@ def G_FREEZE : GenericInstruction { let hasSideEffects = false; } +def G_LROUND: GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src); + let hasSideEffects = false; +} + +def G_LLROUND: GenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src); + let hasSideEffects = false; +} + //------------------------------------------------------------------------------ // Binary ops. //------------------------------------------------------------------------------ diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index f35156d59849..e2d3dbdda88a 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -130,7 +130,13 @@ def extending_loads : GICombineRule< (match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD):$root, [{ return Helper.matchCombineExtendingLoads(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyCombineExtendingLoads(*${root}, ${matchinfo}); }])>; -def combines_for_extload: GICombineGroup<[extending_loads]>; + +def load_and_mask : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_AND):$root, + [{ return Helper.matchCombineLoadWithAndMask(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; +def combines_for_extload: GICombineGroup<[extending_loads, load_and_mask]>; def sext_trunc_sextload : GICombineRule< (defs root:$d), @@ -197,6 +203,12 @@ def reduce_shl_of_extend : GICombineRule< [{ return Helper.matchCombineShlOfExtend(*${mi}, ${matchinfo}); }]), (apply [{ Helper.applyCombineShlOfExtend(*${mi}, ${matchinfo}); }])>; +def narrow_binop_feeding_and : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_AND):$root, + [{ return Helper.matchNarrowBinopFeedingAnd(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; + // [us]itofp(undef) = 0, because the result value is bounded. def undef_to_fp_zero : GICombineRule< (defs root:$root), @@ -275,7 +287,7 @@ def select_constant_cmp: GICombineRule< def right_identity_zero: GICombineRule< (defs root:$root), (match (wip_match_opcode G_SUB, G_ADD, G_OR, G_XOR, G_SHL, G_ASHR, G_LSHR, - G_PTR_ADD):$root, + G_PTR_ADD, G_ROTL, G_ROTR):$root, [{ return Helper.matchConstantOp(${root}->getOperand(2), 0); }]), (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) >; @@ -507,6 +519,13 @@ def fabs_fabs_fold: GICombineRule< (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) >; +// Fold (fabs (fneg x)) -> (fabs x). +def fabs_fneg_fold: GICombineRule < + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_FABS):$root, + [{ return Helper.matchCombineFAbsOfFNeg(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; + // Fold (unmerge cst) -> cst1, cst2, ... def unmerge_cst_matchinfo : GIDefMatchData<"SmallVector<APInt, 8>">; def unmerge_cst : GICombineRule< @@ -588,6 +607,14 @@ def load_or_combine : GICombineRule< [{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +def truncstore_merge_matcdata : GIDefMatchData<"MergeTruncStoresInfo">; +def truncstore_merge : GICombineRule< + (defs root:$root, truncstore_merge_matcdata:$info), + (match (wip_match_opcode G_STORE):$root, + [{ return Helper.matchTruncStoreMerge(*${root}, ${info}); }]), + (apply [{ Helper.applyTruncStoreMerge(*${root}, ${info}); }])>; + def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">; def extend_through_phis : GICombineRule< (defs root:$root, extend_through_phis_matchdata:$matchinfo), @@ -638,6 +665,18 @@ def icmp_to_true_false_known_bits : GICombineRule< [{ return Helper.matchICmpToTrueFalseKnownBits(*${d}, ${matchinfo}); }]), (apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>; +def icmp_to_lhs_known_bits : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_ICMP):$root, + [{ return Helper.matchICmpToLHSKnownBits(*${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +def and_or_disjoint_mask : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_AND):$root, + [{ return Helper.matchAndOrDisjointMask(*${root}, ${info}); }]), + (apply [{ Helper.applyBuildFnNoErase(*${root}, ${info}); }])>; + def bitfield_extract_from_and : GICombineRule< (defs root:$root, build_fn_matchinfo:$info), (match (wip_match_opcode G_AND):$root, @@ -652,8 +691,31 @@ def bitfield_extract_from_sext_inreg : GICombineRule< [{ return Helper.matchBitfieldExtractFromSExtInReg(*${root}, ${info}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; +def bitfield_extract_from_shr : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_ASHR, G_LSHR):$root, + [{ return Helper.matchBitfieldExtractFromShr(*${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + +def bitfield_extract_from_shr_and : GICombineRule< + (defs root:$root, build_fn_matchinfo:$info), + (match (wip_match_opcode G_ASHR, G_LSHR):$root, + [{ return Helper.matchBitfieldExtractFromShrAnd(*${root}, ${info}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>; + def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg, - bitfield_extract_from_and]>; + bitfield_extract_from_and, + bitfield_extract_from_shr, + bitfield_extract_from_shr_and]>; + +def udiv_by_const : GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_UDIV):$root, + [{ return Helper.matchUDivByConst(*${root}); }]), + (apply [{ Helper.applyUDivByConst(*${root}); }])>; + +def intdiv_combines : GICombineGroup<[udiv_by_const]>; + def reassoc_ptradd : GICombineRule< (defs root:$root, build_fn_matchinfo:$matchinfo), (match (wip_match_opcode G_PTR_ADD):$root, @@ -669,6 +731,26 @@ def constant_fold : GICombineRule< [{ return Helper.matchConstantFold(*${d}, ${matchinfo}); }]), (apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>; +def mulo_by_2: GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_UMULO, G_SMULO):$root, + [{ return Helper.matchMulOBy2(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; + +def mulh_to_lshr : GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_UMULH):$root, + [{ return Helper.matchUMulHToLShr(*${root}); }]), + (apply [{ Helper.applyUMulHToLShr(*${root}); }])>; + +def mulh_combines : GICombineGroup<[mulh_to_lshr]>; + +def redundant_neg_operands: GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMAD, G_FMA):$root, + [{ return Helper.matchRedundantNegOperands(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -685,13 +767,14 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero, fneg_fneg_fold, right_identity_one]>; def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p, - overlapping_and]>; + overlapping_and, mulo_by_2]>; def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, - zext_trunc_fold, icmp_to_true_false_known_bits]>; + zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits]>; -def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>; +def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend, + narrow_binop_feeding_and]>; def phi_combines : GICombineGroup<[extend_through_phis]>; @@ -713,8 +796,10 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, unmerge_zext_to_zext, merge_unmerge, trunc_ext_fold, trunc_shl, const_combines, xor_of_and_with_same_reg, ptr_add_with_zero, shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine, - div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract, - constant_fold]>; + truncstore_merge, div_rem_to_divrem, funnel_shift_combines, + form_bitfield_extract, constant_fold, fabs_fneg_fold, + intdiv_combines, mulh_combines, redundant_neg_operands, + and_or_disjoint_mask ]>; // A combine group used to for prelegalizer combiners at -O0. The combines in // this group have been selected based on experiments to balance code size and diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 8a5052401e9b..12eee24b578f 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -144,6 +144,8 @@ def : GINodeEquiv<G_FMAXNUM_IEEE, fmaxnum_ieee>; def : GINodeEquiv<G_READCYCLECOUNTER, readcyclecounter>; def : GINodeEquiv<G_ROTR, rotr>; def : GINodeEquiv<G_ROTL, rotl>; +def : GINodeEquiv<G_LROUND, lround>; +def : GINodeEquiv<G_LLROUND, llround>; def : GINodeEquiv<G_STRICT_FADD, strict_fadd>; def : GINodeEquiv<G_STRICT_FSUB, strict_fsub>; diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index e9720d765167..7ae690b83770 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -306,6 +306,9 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment, // the assembly matcher will provide a function to map from diagnostic types // to message strings. string DiagnosticString = ""; + + // Target-specific flags. This becomes the TSFlags field in TargetRegisterClass. + bits<8> TSFlags = 0; } // The memberList in a RegisterClass is a dag of set operations. TableGen @@ -650,6 +653,25 @@ class Instruction : InstructionEncoding { /// instruction selection predicates. FastISel cannot handle such cases, but /// SelectionDAG can. bit FastISelShouldIgnore = false; + + /// HasPositionOrder: Indicate tablegen to sort the instructions by record + /// ID, so that instruction that is defined earlier can be sorted earlier + /// in the assembly matching table. + bit HasPositionOrder = false; +} + +/// Defines a Pat match between compressed and uncompressed instruction. +/// The relationship and helper function generation are handled by +/// CompressInstEmitter backend. +class CompressPat<dag input, dag output, list<Predicate> predicates = []> { + /// Uncompressed instruction description. + dag Input = input; + /// Compressed instruction description. + dag Output = output; + /// Predicates that must be true for this to match. + list<Predicate> Predicates = predicates; + /// Duplicate match when tied operand is just different. + bit isCompressOnly = false; } /// Defines an additional encoding that disassembles to the given instruction diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h index 93bfdd20e082..752032d3d04d 100644 --- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -15,6 +15,7 @@ #define LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H #include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegister.h" #include <cstdint> namespace llvm { @@ -219,6 +220,14 @@ public: return SupportDebugThreadLocalLocation; } + /// Returns the register used as static base in RWPI variants. + virtual const MCRegister getStaticBase() const { return MCRegister::NoRegister; } + + /// Get the target specific RWPI relocation. + virtual const MCExpr *getIndirectSymViaRWPI(const MCSymbol *Sym) const { + return nullptr; + } + /// Get the target specific PC relative GOT entry relocation virtual const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV, const MCSymbol *Sym, diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h index dd17af4a642a..acfb265a9ff9 100644 --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -13,6 +13,7 @@ #ifndef LLVM_TARGET_TARGETMACHINE_H #define LLVM_TARGET_TARGETMACHINE_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" @@ -20,9 +21,11 @@ #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Error.h" +#include "llvm/Support/PGOOptions.h" #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetOptions.h" #include <string> +#include <utility> namespace llvm { @@ -110,6 +113,9 @@ protected: // Can only create subclasses. unsigned RequireStructuredCFG : 1; unsigned O0WantsFastISel : 1; + // PGO related tunables. + Optional<PGOOptions> PGOOption = None; + public: const TargetOptions DefaultOptions; mutable TargetOptions Options; @@ -303,6 +309,9 @@ public: return false; } + void setPGOOption(Optional<PGOOptions> PGOOpt) { PGOOption = PGOOpt; } + const Optional<PGOOptions> &getPGOOption() const { return PGOOption; } + /// If the specified generic pointer could be assumed as a pointer to a /// specific address space, return that address space. /// @@ -311,6 +320,18 @@ public: /// properties. virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; } + /// If the specified predicate checks whether a generic pointer falls within + /// a specified address space, return that generic pointer and the address + /// space being queried. + /// + /// Such predicates could be specified in @llvm.assume intrinsics for the + /// optimizer to assume that the given generic pointer always falls within + /// the address space based on that predicate. + virtual std::pair<const Value *, unsigned> + getPredicatedAddrSpace(const Value *V) const { + return std::make_pair(nullptr, -1); + } + /// Get a \c TargetIRAnalysis appropriate for the target. /// /// This is used to construct the new pass manager's target IR analysis pass, @@ -464,6 +485,10 @@ public: virtual bool useIPRA() const { return false; } + + /// The default variant to use in unqualified `asm` instructions. + /// If this returns 0, `asm "$(foo$|bar$)"` will evaluate to `asm "foo"`. + virtual int unqualifiedInlineAsmVariant() const { return 0; } }; /// Helper method for getting the code model, returning Default if diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index e5bea9041479..912f6d1c153a 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -110,12 +110,23 @@ namespace llvm { DisableWithDiag // Disable the abort but emit a diagnostic on failure. }; + /// Indicates when and how the Swift async frame pointer bit should be set. + enum class SwiftAsyncFramePointerMode { + /// Determine whether to set the bit statically or dynamically based + /// on the deployment target. + DeploymentBased, + /// Always set the bit. + Always, + /// Never set the bit. + Never, + }; + class TargetOptions { public: TargetOptions() : UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false), NoTrappingFPMath(true), NoSignedZerosFPMath(false), - EnableAIXExtendedAltivecABI(false), + ApproxFuncFPMath(false), EnableAIXExtendedAltivecABI(false), HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false), @@ -129,7 +140,7 @@ namespace llvm { EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false), EmitAddrsig(false), EmitCallSiteInfo(false), SupportsDebugEntryValues(false), EnableDebugEntryValues(false), - PseudoProbeForProfiling(false), ValueTrackingVariableLocations(false), + ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false), DebugStrictDwarf(false), FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {} @@ -172,9 +183,15 @@ namespace llvm { /// argument or result as insignificant. unsigned NoSignedZerosFPMath : 1; + /// ApproxFuncFPMath - This flag is enabled when the + /// -enable-approx-func-fp-math is specified on the command line. This + /// specifies that optimizations are allowed to substitute math functions + /// with approximate calculations + unsigned ApproxFuncFPMath : 1; + /// EnableAIXExtendedAltivecABI - This flag returns true when -vec-extabi is /// specified. The code generator is then able to use both volatile and - /// nonvolitle vector regisers. When false, the code generator only uses + /// nonvolitle vector registers. When false, the code generator only uses /// volatile vector registers which is the default setting on AIX. unsigned EnableAIXExtendedAltivecABI : 1; @@ -219,6 +236,11 @@ namespace llvm { /// selection fails to lower/select an instruction. GlobalISelAbortMode GlobalISelAbort = GlobalISelAbortMode::Enable; + /// Control when and how the Swift async frame pointer bit should + /// be set. + SwiftAsyncFramePointerMode SwiftAsyncFramePointer = + SwiftAsyncFramePointerMode::Always; + /// UseInitArray - Use .init_array instead of .ctors for static /// constructors. unsigned UseInitArray : 1; @@ -305,9 +327,6 @@ namespace llvm { /// production. bool ShouldEmitDebugEntryValues() const; - /// Emit pseudo probes into the binary for sample profiling - unsigned PseudoProbeForProfiling : 1; - // When set to true, use experimental new debug variable location tracking, // which seeks to follow the values of variables rather than their location, // post isel. @@ -328,6 +347,9 @@ namespace llvm { /// passed on the command line. std::string StackUsageOutput; + /// If greater than 0, override TargetLoweringBase::PrefLoopAlignment. + unsigned LoopAlignment = 0; + /// FloatABIType - This setting is set by -float-abi=xxx option is specfied /// on the command line. This setting may either be Default, Soft, or Hard. /// Default selects the target's default behavior. Soft selects the ABI for diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index 44ec2250a9c5..d8ef7c49a5f9 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -297,10 +297,6 @@ def SDTAtomicLoad : SDTypeProfile<1, 1, [ SDTCisInt<0>, SDTCisPtrTy<1> ]>; -def SDTConvertOp : SDTypeProfile<1, 5, [ //cvtss, su, us, uu, ff, fs, fu, sf, su - SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>, SDTCisPtrTy<4>, SDTCisPtrTy<5> -]>; - class SDCallSeqStart<list<SDTypeConstraint> constraints> : SDTypeProfile<0, 2, constraints>; class SDCallSeqEnd<list<SDTypeConstraint> constraints> : @@ -1050,6 +1046,10 @@ def extloadvi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = i32; } +def extloadvf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = true; + let ScalarMemoryVT = f16; +} def extloadvf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { let IsLoad = true; let ScalarMemoryVT = f32; @@ -1472,7 +1472,7 @@ def any_fsetccs : PatFrags<(ops node:$lhs, node:$rhs, node:$pred), [(strict_fsetccs node:$lhs, node:$rhs, node:$pred), (setcc node:$lhs, node:$rhs, node:$pred)]>; -multiclass binary_atomic_op_ord<SDNode atomic_op> { +multiclass binary_atomic_op_ord { def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val), (!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> { let IsAtomic = true; @@ -1500,7 +1500,7 @@ multiclass binary_atomic_op_ord<SDNode atomic_op> { } } -multiclass ternary_atomic_op_ord<SDNode atomic_op> { +multiclass ternary_atomic_op_ord { def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val), (!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> { let IsAtomic = true; @@ -1550,10 +1550,10 @@ multiclass binary_atomic_op<SDNode atomic_op, bit IsInt = 1> { let MemoryVT = !if(IsInt, i64, f64); } - defm NAME#_8 : binary_atomic_op_ord<atomic_op>; - defm NAME#_16 : binary_atomic_op_ord<atomic_op>; - defm NAME#_32 : binary_atomic_op_ord<atomic_op>; - defm NAME#_64 : binary_atomic_op_ord<atomic_op>; + defm NAME#_8 : binary_atomic_op_ord; + defm NAME#_16 : binary_atomic_op_ord; + defm NAME#_32 : binary_atomic_op_ord; + defm NAME#_64 : binary_atomic_op_ord; } multiclass ternary_atomic_op<SDNode atomic_op> { @@ -1578,10 +1578,10 @@ multiclass ternary_atomic_op<SDNode atomic_op> { let MemoryVT = i64; } - defm NAME#_8 : ternary_atomic_op_ord<atomic_op>; - defm NAME#_16 : ternary_atomic_op_ord<atomic_op>; - defm NAME#_32 : ternary_atomic_op_ord<atomic_op>; - defm NAME#_64 : ternary_atomic_op_ord<atomic_op>; + defm NAME#_8 : ternary_atomic_op_ord; + defm NAME#_16 : ternary_atomic_op_ord; + defm NAME#_32 : ternary_atomic_op_ord; + defm NAME#_64 : ternary_atomic_op_ord; } defm atomic_load_add : binary_atomic_op<atomic_load_add>; diff --git a/llvm/include/llvm/TextAPI/Architecture.h b/llvm/include/llvm/TextAPI/Architecture.h index 3cd8a3a19e96..978359995074 100644 --- a/llvm/include/llvm/TextAPI/Architecture.h +++ b/llvm/include/llvm/TextAPI/Architecture.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TEXTAPI_MACHO_ARCHITECTURE_H -#define LLVM_TEXTAPI_MACHO_ARCHITECTURE_H +#ifndef LLVM_TEXTAPI_ARCHITECTURE_H +#define LLVM_TEXTAPI_ARCHITECTURE_H #include <cstdint> #include <utility> @@ -54,4 +54,4 @@ raw_ostream &operator<<(raw_ostream &OS, Architecture Arch); } // end namespace MachO. } // end namespace llvm. -#endif // LLVM_TEXTAPI_MACHO_ARCHITECTURE_H +#endif // LLVM_TEXTAPI_ARCHITECTURE_H diff --git a/llvm/include/llvm/TextAPI/ArchitectureSet.h b/llvm/include/llvm/TextAPI/ArchitectureSet.h index e9b374e4f69f..f17cb74c9183 100644 --- a/llvm/include/llvm/TextAPI/ArchitectureSet.h +++ b/llvm/include/llvm/TextAPI/ArchitectureSet.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TEXTAPI_MACHO_ARCHITECTURESET_H -#define LLVM_TEXTAPI_MACHO_ARCHITECTURESET_H +#ifndef LLVM_TEXTAPI_ARCHITECTURESET_H +#define LLVM_TEXTAPI_ARCHITECTURESET_H #include "llvm/TextAPI/Architecture.h" #include <cstddef> @@ -168,4 +168,4 @@ raw_ostream &operator<<(raw_ostream &OS, ArchitectureSet Set); } // end namespace MachO. } // end namespace llvm. -#endif // LLVM_TEXTAPI_MACHO_ARCHITECTURESET_H +#endif // LLVM_TEXTAPI_ARCHITECTURESET_H diff --git a/llvm/include/llvm/TextAPI/InterfaceFile.h b/llvm/include/llvm/TextAPI/InterfaceFile.h index d17c0c1c5b47..03a541454e1a 100644 --- a/llvm/include/llvm/TextAPI/InterfaceFile.h +++ b/llvm/include/llvm/TextAPI/InterfaceFile.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TEXTAPI_MACHO_INTERFACEFILE_H -#define LLVM_TEXTAPI_MACHO_INTERFACEFILE_H +#ifndef LLVM_TEXTAPI_INTERFACEFILE_H +#define LLVM_TEXTAPI_INTERFACEFILE_H #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/DenseMap.h" @@ -445,7 +445,7 @@ bool operator==(const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *, KeyInfoT, BucketT> &RHS) { if (LHS.size() != RHS.size()) return false; - for (auto KV : LHS) { + for (const auto &KV : LHS) { auto I = RHS.find(KV.first); if (I == RHS.end() || *I->second != *KV.second) return false; @@ -456,4 +456,4 @@ bool operator==(const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *, } // end namespace MachO. } // end namespace llvm. -#endif // LLVM_TEXTAPI_MACHO_INTERFACEFILE_H +#endif // LLVM_TEXTAPI_INTERFACEFILE_H diff --git a/llvm/include/llvm/TextAPI/PackedVersion.h b/llvm/include/llvm/TextAPI/PackedVersion.h index e3d2bd5ae2e5..24bec2ebe8fc 100644 --- a/llvm/include/llvm/TextAPI/PackedVersion.h +++ b/llvm/include/llvm/TextAPI/PackedVersion.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TEXTAPI_MACHO_PACKEDVERSION_H -#define LLVM_TEXTAPI_MACHO_PACKEDVERSION_H +#ifndef LLVM_TEXTAPI_PACKEDVERSION_H +#define LLVM_TEXTAPI_PACKEDVERSION_H #include <cstdint> #include <utility> @@ -64,4 +64,4 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PackedVersion &Version) { } // end namespace MachO. } // end namespace llvm. -#endif // LLVM_TEXTAPI_MACHO_PACKEDVERSION_H +#endif // LLVM_TEXTAPI_PACKEDVERSION_H diff --git a/llvm/include/llvm/TextAPI/Platform.h b/llvm/include/llvm/TextAPI/Platform.h index 3f052b7b8624..f7affc3ae980 100644 --- a/llvm/include/llvm/TextAPI/Platform.h +++ b/llvm/include/llvm/TextAPI/Platform.h @@ -9,8 +9,8 @@ // Defines the Platforms supported by Tapi and helpers. // //===----------------------------------------------------------------------===// -#ifndef LLVM_TEXTAPI_MACHO_PLATFORM_H -#define LLVM_TEXTAPI_MACHO_PLATFORM_H +#ifndef LLVM_TEXTAPI_PLATFORM_H +#define LLVM_TEXTAPI_PLATFORM_H #include "llvm/ADT/SmallSet.h" #include "llvm/BinaryFormat/MachO.h" @@ -46,4 +46,4 @@ std::string getOSAndEnvironmentName(PlatformKind Platform, } // end namespace MachO. } // end namespace llvm. -#endif // LLVM_TEXTAPI_MACHO_PLATFORM_H +#endif // LLVM_TEXTAPI_PLATFORM_H diff --git a/llvm/include/llvm/TextAPI/Symbol.h b/llvm/include/llvm/TextAPI/Symbol.h index 02f184d2502f..dfc84908bba2 100644 --- a/llvm/include/llvm/TextAPI/Symbol.h +++ b/llvm/include/llvm/TextAPI/Symbol.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TEXTAPI_MACHO_SYMBOL_H -#define LLVM_TEXTAPI_MACHO_SYMBOL_H +#ifndef LLVM_TEXTAPI_SYMBOL_H +#define LLVM_TEXTAPI_SYMBOL_H #include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/StringRef.h" @@ -132,4 +132,4 @@ private: } // end namespace MachO. } // end namespace llvm. -#endif // LLVM_TEXTAPI_MACHO_SYMBOL_H +#endif // LLVM_TEXTAPI_SYMBOL_H diff --git a/llvm/include/llvm/TextAPI/Target.h b/llvm/include/llvm/TextAPI/Target.h index 53f56a6ee7b0..c2588b9d5a21 100644 --- a/llvm/include/llvm/TextAPI/Target.h +++ b/llvm/include/llvm/TextAPI/Target.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TEXTAPI_MACHO_TARGET_H -#define LLVM_TEXTAPI_MACHO_TARGET_H +#ifndef LLVM_TEXTAPI_TARGET_H +#define LLVM_TEXTAPI_TARGET_H #include "llvm/ADT/Triple.h" #include "llvm/Support/Error.h" @@ -67,4 +67,4 @@ raw_ostream &operator<<(raw_ostream &OS, const Target &Target); } // namespace MachO } // namespace llvm -#endif // LLVM_TEXTAPI_MACHO_TARGET_H +#endif // LLVM_TEXTAPI_TARGET_H diff --git a/llvm/include/llvm/TextAPI/TextAPIReader.h b/llvm/include/llvm/TextAPI/TextAPIReader.h index a403bab8465d..389335312a74 100644 --- a/llvm/include/llvm/TextAPI/TextAPIReader.h +++ b/llvm/include/llvm/TextAPI/TextAPIReader.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TEXTAPI_MACHO_TEXTAPIREADER_H -#define LLVM_TEXTAPI_MACHO_TEXTAPIREADER_H +#ifndef LLVM_TEXTAPI_TEXTAPIREADER_H +#define LLVM_TEXTAPI_TEXTAPIREADER_H #include "llvm/Support/Error.h" @@ -30,4 +30,4 @@ public: } // end namespace MachO. } // end namespace llvm. -#endif // LLVM_TEXTAPI_MACHO_TEXTAPIREADER_H +#endif // LLVM_TEXTAPI_TEXTAPIREADER_H diff --git a/llvm/include/llvm/TextAPI/TextAPIWriter.h b/llvm/include/llvm/TextAPI/TextAPIWriter.h index 763805168ae6..f9857a806f60 100644 --- a/llvm/include/llvm/TextAPI/TextAPIWriter.h +++ b/llvm/include/llvm/TextAPI/TextAPIWriter.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TEXTAPI_MACHO_TEXTAPIWRITER_H -#define LLVM_TEXTAPI_MACHO_TEXTAPIWRITER_H +#ifndef LLVM_TEXTAPI_TEXTAPIWRITER_H +#define LLVM_TEXTAPI_TEXTAPIWRITER_H namespace llvm { @@ -28,4 +28,4 @@ public: } // end namespace MachO. } // end namespace llvm. -#endif // LLVM_TEXTAPI_MACHO_TEXTAPIWRITER_H +#endif // LLVM_TEXTAPI_TEXTAPIWRITER_H diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index c93b8adcc890..d4cbc9bd20b7 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -101,6 +101,7 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator.h" @@ -591,7 +592,7 @@ struct IRPosition { LLVMContext &Ctx = getAnchorValue().getContext(); for (Attribute::AttrKind AK : AKs) - AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK); + AttrList = AttrList.removeAttributeAtIndex(Ctx, getAttrIdx(), AK); if (CB) CB->setAttributes(AttrList); @@ -1150,8 +1151,6 @@ struct Attributor { /// \param Allowed If not null, a set limiting the attribute opportunities. /// \param DeleteFns Whether to delete functions. /// \param RewriteSignatures Whether to rewrite function signatures. - /// \param MaxFixedPointIterations Maximum number of iterations to run until - /// fixpoint. Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache, CallGraphUpdater &CGUpdater, DenseSet<const char *> *Allowed = nullptr, bool DeleteFns = true, @@ -1169,8 +1168,9 @@ struct Attributor { /// \param CGUpdater Helper to update an underlying call graph. /// \param Allowed If not null, a set limiting the attribute opportunities. /// \param DeleteFns Whether to delete functions - /// \param MaxFixedPointIterations Maximum number of iterations to run until - /// fixpoint. + /// \param RewriteSignatures Whether to rewrite function signatures. + /// \param MaxFixpointIterations Maximum number of iterations to run until + /// fixpoint. /// \param OREGetter A callback function that returns an ORE object from a /// Function pointer. /// \param PassName The name of the pass emitting remarks. @@ -1855,6 +1855,10 @@ public: /// static void createShallowWrapper(Function &F); + /// Returns true if the function \p F can be internalized. i.e. it has a + /// compatible linkage. + static bool isInternalizable(Function &F); + /// Make another copy of the function \p F such that the copied version has /// internal linkage afterwards and can be analysed. Then we replace all uses /// of the original function to the copied one @@ -1870,6 +1874,22 @@ public: /// null pointer. static Function *internalizeFunction(Function &F, bool Force = false); + /// Make copies of each function in the set \p FnSet such that the copied + /// version has internal linkage afterwards and can be analysed. Then we + /// replace all uses of the original function to the copied one. The map + /// \p FnMap contains a mapping of functions to their internalized versions. + /// + /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr` + /// linkage can be internalized because these linkages guarantee that other + /// definitions with the same name have the same semantics as this one. + /// + /// This version will internalize all the functions in the set \p FnSet at + /// once and then replace the uses. This prevents internalized functions being + /// called by external functions when there is an internalized version in the + /// module. + static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet, + DenseMap<Function *, Function *> &FnMap); + /// Return the data layout associated with the anchor scope. const DataLayout &getDataLayout() const { return InfoCache.DL; } @@ -2492,6 +2512,139 @@ struct IntegerRangeState : public AbstractState { return *this; } }; + +/// Simple state for a set. +/// +/// This represents a state containing a set of values. The interface supports +/// modelling sets that contain all possible elements. The state's internal +/// value is modified using union or intersection operations. +template <typename BaseTy> struct SetState : public AbstractState { + /// A wrapper around a set that has semantics for handling unions and + /// intersections with a "universal" set that contains all elements. + struct SetContents { + /// Creates a universal set with no concrete elements or an empty set. + SetContents(bool Universal) : Universal(Universal) {} + + /// Creates a non-universal set with concrete values. + SetContents(const DenseSet<BaseTy> &Assumptions) + : Universal(false), Set(Assumptions) {} + + SetContents(bool Universal, const DenseSet<BaseTy> &Assumptions) + : Universal(Universal), Set(Assumptions) {} + + const DenseSet<BaseTy> &getSet() const { return Set; } + + bool isUniversal() const { return Universal; } + + bool empty() const { return Set.empty() && !Universal; } + + /// Finds A := A ^ B where A or B could be the "Universal" set which + /// contains every possible attribute. Returns true if changes were made. + bool getIntersection(const SetContents &RHS) { + bool IsUniversal = Universal; + unsigned Size = Set.size(); + + // A := A ^ U = A + if (RHS.isUniversal()) + return false; + + // A := U ^ B = B + if (Universal) + Set = RHS.getSet(); + else + set_intersect(Set, RHS.getSet()); + + Universal &= RHS.isUniversal(); + return IsUniversal != Universal || Size != Set.size(); + } + + /// Finds A := A u B where A or B could be the "Universal" set which + /// contains every possible attribute. returns true if changes were made. + bool getUnion(const SetContents &RHS) { + bool IsUniversal = Universal; + unsigned Size = Set.size(); + + // A := A u U = U = U u B + if (!RHS.isUniversal() && !Universal) + set_union(Set, RHS.getSet()); + + Universal |= RHS.isUniversal(); + return IsUniversal != Universal || Size != Set.size(); + } + + private: + /// Indicates if this set is "universal", containing every possible element. + bool Universal; + + /// The set of currently active assumptions. + DenseSet<BaseTy> Set; + }; + + SetState() : Known(false), Assumed(true), IsAtFixedpoint(false) {} + + /// Initializes the known state with an initial set and initializes the + /// assumed state as universal. + SetState(const DenseSet<BaseTy> &Known) + : Known(Known), Assumed(true), IsAtFixedpoint(false) {} + + /// See AbstractState::isValidState() + bool isValidState() const override { return !Assumed.empty(); } + + /// See AbstractState::isAtFixpoint() + bool isAtFixpoint() const override { return IsAtFixedpoint; } + + /// See AbstractState::indicateOptimisticFixpoint(...) + ChangeStatus indicateOptimisticFixpoint() override { + IsAtFixedpoint = true; + Known = Assumed; + return ChangeStatus::UNCHANGED; + } + + /// See AbstractState::indicatePessimisticFixpoint(...) + ChangeStatus indicatePessimisticFixpoint() override { + IsAtFixedpoint = true; + Assumed = Known; + return ChangeStatus::CHANGED; + } + + /// Return the known state encoding. + const SetContents &getKnown() const { return Known; } + + /// Return the assumed state encoding. + const SetContents &getAssumed() const { return Assumed; } + + /// Returns if the set state contains the element. + bool setContains(const BaseTy &Elem) const { + return Assumed.getSet().contains(Elem) || Known.getSet().contains(Elem); + } + + /// Performs the set intersection between this set and \p RHS. Returns true if + /// changes were made. + bool getIntersection(const SetContents &RHS) { + unsigned SizeBefore = Assumed.getSet().size(); + + // Get intersection and make sure that the known set is still a proper + // subset of the assumed set. A := K u (A ^ R). + Assumed.getIntersection(RHS); + Assumed.getUnion(Known); + + return SizeBefore != Assumed.getSet().size(); + } + + /// Performs the set union between this set and \p RHS. Returns true if + /// changes were made. + bool getUnion(const SetContents &RHS) { return Assumed.getUnion(RHS); } + +private: + /// The set of values known for this state. + SetContents Known; + + /// The set of assumed values for this state. + SetContents Assumed; + + bool IsAtFixedpoint; +}; + /// Helper struct necessary as the modular build fails if the virtual method /// IRAttribute::manifest is defined in the Attributor.cpp. struct IRAttributeManifest { @@ -3394,7 +3547,7 @@ struct AADereferenceable }; using AAAlignmentStateType = - IncIntegerState<uint32_t, Value::MaximumAlignment, 1>; + IncIntegerState<uint64_t, Value::MaximumAlignment, 1>; /// An abstract interface for all align attributes. struct AAAlign : public IRAttribute< Attribute::Alignment, @@ -3402,10 +3555,10 @@ struct AAAlign : public IRAttribute< AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} /// Return assumed alignment. - unsigned getAssumedAlign() const { return getAssumed(); } + uint64_t getAssumedAlign() const { return getAssumed(); } /// Return known alignment. - unsigned getKnownAlign() const { return getKnown(); } + uint64_t getKnownAlign() const { return getKnown(); } /// See AbstractAttribute::getName() const std::string getName() const override { return "AAAlign"; } @@ -3776,7 +3929,7 @@ struct AAMemoryLocation /// Return true if we assume that the associated functions has no observable /// accesses. bool isAssumedReadNone() const { - return isAssumed(NO_LOCATIONS) | isAssumedStackOnly(); + return isAssumed(NO_LOCATIONS) || isAssumedStackOnly(); } /// Return true if we know that the associated functions has at most @@ -3920,19 +4073,19 @@ struct AAValueConstantRange static AAValueConstantRange &createForPosition(const IRPosition &IRP, Attributor &A); - /// Return an assumed range for the assocaited value a program point \p CtxI. + /// Return an assumed range for the associated value a program point \p CtxI. /// If \p I is nullptr, simply return an assumed range. virtual ConstantRange getAssumedConstantRange(Attributor &A, const Instruction *CtxI = nullptr) const = 0; - /// Return a known range for the assocaited value at a program point \p CtxI. + /// Return a known range for the associated value at a program point \p CtxI. /// If \p I is nullptr, simply return a known range. virtual ConstantRange getKnownConstantRange(Attributor &A, const Instruction *CtxI = nullptr) const = 0; - /// Return an assumed constant for the assocaited value a program point \p + /// Return an assumed constant for the associated value a program point \p /// CtxI. Optional<ConstantInt *> getAssumedConstantInt(Attributor &A, @@ -4435,6 +4588,9 @@ struct AAFunctionReachability /// If the function represented by this possition can reach \p Fn. virtual bool canReach(Attributor &A, Function *Fn) const = 0; + /// Can \p CB reach \p Fn + virtual bool canReach(Attributor &A, CallBase &CB, Function *Fn) const = 0; + /// Create an abstract attribute view for the position \p IRP. static AAFunctionReachability &createForPosition(const IRPosition &IRP, Attributor &A); @@ -4587,6 +4743,40 @@ struct AAPointerInfo : public AbstractAttribute { static const char ID; }; +/// An abstract attribute for getting assumption information. +struct AAAssumptionInfo + : public StateWrapper<SetState<StringRef>, AbstractAttribute, + DenseSet<StringRef>> { + using Base = + StateWrapper<SetState<StringRef>, AbstractAttribute, DenseSet<StringRef>>; + + AAAssumptionInfo(const IRPosition &IRP, Attributor &A, + const DenseSet<StringRef> &Known) + : Base(IRP, Known) {} + + /// Returns true if the assumption set contains the assumption \p Assumption. + virtual bool hasAssumption(const StringRef Assumption) const = 0; + + /// Create an abstract attribute view for the position \p IRP. + static AAAssumptionInfo &createForPosition(const IRPosition &IRP, + Attributor &A); + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AAAssumptionInfo"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is + /// AAAssumptionInfo + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; +}; + raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &); /// Run options, used by the pass manager. diff --git a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h index ce61eea05c79..0b6734a3929d 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h @@ -17,6 +17,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/PassManager.h" namespace llvm { @@ -38,6 +39,13 @@ enum MemoryAccessKind { /// Returns the memory access properties of this copy of the function. MemoryAccessKind computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR); +/// Propagate function attributes for function summaries along the index's +/// callgraph during thinlink +bool thinLTOPropagateFunctionAttrs( + ModuleSummaryIndex &Index, + function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)> + isPrevailing); + /// Computes function attributes in post-order over the call graph. /// /// By operating in post-order, this pass computes precise attributes for diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h index aad938d48570..c5bafb89fcb5 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h @@ -167,16 +167,24 @@ void ComputeCrossModuleImportForModuleFromIndex( FunctionImporter::ImportMapTy &ImportList); /// PrevailingType enum used as a return type of callback passed -/// to computeDeadSymbols. Yes and No values used when status explicitly -/// set by symbols resolution, otherwise status is Unknown. +/// to computeDeadSymbolsAndUpdateIndirectCalls. Yes and No values used when +/// status explicitly set by symbols resolution, otherwise status is Unknown. enum class PrevailingType { Yes, No, Unknown }; +/// Update call edges for indirect calls to local functions added from +/// SamplePGO when needed. Normally this is done during +/// computeDeadSymbolsAndUpdateIndirectCalls, but can be called standalone +/// when that is not called (e.g. during testing). +void updateIndirectCalls(ModuleSummaryIndex &Index); + /// Compute all the symbols that are "dead": i.e these that can't be reached /// in the graph from any of the given symbols listed in /// \p GUIDPreservedSymbols. Non-prevailing symbols are symbols without a /// prevailing copy anywhere in IR and are normally dead, \p isPrevailing /// predicate returns status of symbol. -void computeDeadSymbols( +/// Also update call edges for indirect calls to local functions added from +/// SamplePGO when needed. +void computeDeadSymbolsAndUpdateIndirectCalls( ModuleSummaryIndex &Index, const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols, function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing); @@ -214,12 +222,15 @@ std::error_code EmitImportsFiles( StringRef ModulePath, StringRef OutputFilename, const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex); -/// Resolve prevailing symbol linkages and constrain visibility (1. CanAutoHide, -/// 2. consider visibility from other definitions for ELF) in \p TheModule based -/// on the information recorded in the summaries during global summary-based -/// analysis. -void thinLTOResolvePrevailingInModule(Module &TheModule, - const GVSummaryMapTy &DefinedGlobals); +/// Based on the information recorded in the summaries during global +/// summary-based analysis: +/// 1. Resolve prevailing symbol linkages and constrain visibility (CanAutoHide +/// and consider visibility from other definitions for ELF) in \p TheModule +/// 2. (optional) Apply propagated function attributes to \p TheModule if +/// PropagateAttrs is true +void thinLTOFinalizeInModule(Module &TheModule, + const GVSummaryMapTy &DefinedGlobals, + bool PropagateAttrs); /// Internalize \p TheModule based on the information recorded in the summaries /// during global summary-based analysis. diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h index 442a8ec1d2e2..110c0b4dcf16 100644 --- a/llvm/include/llvm/Transforms/IPO/IROutliner.h +++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -86,6 +86,15 @@ struct OutlinableRegion { DenseMap<unsigned, unsigned> ExtractedArgToAgg; DenseMap<unsigned, unsigned> AggArgToExtracted; + /// Marks whether we need to change the order of the arguments when mapping + /// the old extracted function call to the new aggregate outlined function + /// call. + bool ChangedArgOrder = false; + + /// Marks whether this region ends in a branch, there is special handling + /// required for the following basic blocks in this case. + bool EndsInBranch = false; + /// Mapping of the argument number in the deduplicated function /// to a given constant, which is used when creating the arguments to the call /// to the newly created deduplicated function. This is handled separately @@ -147,6 +156,14 @@ struct OutlinableRegion { /// containing the called function. void reattachCandidate(); + /// Find a corresponding value for \p V in similar OutlinableRegion \p Other. + /// + /// \param Other [in] - The OutlinableRegion to find the corresponding Value + /// in. + /// \param V [in] - The Value to look for in the other region. + /// \return The corresponding Value to \p V if it exists, otherwise nullptr. + Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V); + /// Get the size of the code removed from the region. /// /// \param [in] TTI - The TargetTransformInfo for the parent function. @@ -176,6 +193,16 @@ private: /// \returns The number of Functions created. unsigned doOutline(Module &M); + /// Check whether an OutlinableRegion is incompatible with code already + /// outlined. OutlinableRegions are incomptaible when there are overlapping + /// instructions, or code that has not been recorded has been added to the + /// instructions. + /// + /// \param [in] Region - The OutlinableRegion to check for conflicts with + /// already outlined code. + /// \returns whether the region can safely be outlined. + bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region); + /// Remove all the IRSimilarityCandidates from \p CandidateVec that have /// instructions contained in a previously outlined region and put the /// remaining regions in \p CurrentGroup. @@ -301,8 +328,9 @@ private: struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> { InstructionAllowed() {} - // TODO: Determine a scheme to resolve when the label is similar enough. - bool visitBranchInst(BranchInst &BI) { return false; } + bool visitBranchInst(BranchInst &BI) { + return EnableBranches; + } // TODO: Determine a scheme to resolve when the labels are similar enough. bool visitPHINode(PHINode &PN) { return false; } // TODO: Handle allocas. @@ -341,6 +369,10 @@ private: // TODO: Handle interblock similarity. bool visitTerminator(Instruction &I) { return false; } bool visitInstruction(Instruction &I) { return true; } + + // The flag variable that marks whether we should allow branch instructions + // to be outlined. + bool EnableBranches = false; }; /// A InstVisitor used to exclude certain instructions from being outlined. diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h index 23a39d7f2e2b..a7060943c4c0 100644 --- a/llvm/include/llvm/Transforms/IPO/Inliner.h +++ b/llvm/include/llvm/Transforms/IPO/Inliner.h @@ -14,7 +14,6 @@ #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/IR/PassManager.h" #include <utility> @@ -103,6 +102,9 @@ public: PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); + private: InlineAdvisor &getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, FunctionAnalysisManager &FAM, Module &M); @@ -130,17 +132,27 @@ public: /// before run is called, as part of pass pipeline building. CGSCCPassManager &getPM() { return PM; } - /// Allow adding module-level passes benefiting the contained CGSCC passes. + /// Add a module pass that runs before the CGSCC passes. template <class T> void addModulePass(T Pass) { MPM.addPass(std::move(Pass)); } + /// Add a module pass that runs after the CGSCC passes. + template <class T> void addLateModulePass(T Pass) { + AfterCGMPM.addPass(std::move(Pass)); + } + + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); + private: const InlineParams Params; const InliningAdvisorMode Mode; const unsigned MaxDevirtIterations; + // TODO: Clean this up so we only have one ModulePassManager. CGSCCPassManager PM; ModulePassManager MPM; + ModulePassManager AfterCGMPM; }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/IPO/LoopExtractor.h b/llvm/include/llvm/Transforms/IPO/LoopExtractor.h index def3c5943919..aa697484d0e9 100644 --- a/llvm/include/llvm/Transforms/IPO/LoopExtractor.h +++ b/llvm/include/llvm/Transforms/IPO/LoopExtractor.h @@ -23,6 +23,8 @@ namespace llvm { struct LoopExtractorPass : public PassInfoMixin<LoopExtractorPass> { LoopExtractorPass(unsigned NumLoops = ~0) : NumLoops(NumLoops) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); private: unsigned NumLoops; diff --git a/llvm/include/llvm/Transforms/IPO/ModuleInliner.h b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h new file mode 100644 index 000000000000..963d74d71003 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h @@ -0,0 +1,51 @@ +//===- ModuleInliner.h - Module level Inliner pass --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_MODULEINLINER_H +#define LLVM_TRANSFORMS_IPO_MODULEINLINER_H + +#include "llvm/Analysis/InlineAdvisor.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ReplayInlineAdvisor.h" +#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" +#include "llvm/IR/PassManager.h" +#include <utility> + +namespace llvm { + +class AssumptionCacheTracker; +class ProfileSummaryInfo; + +/// The module inliner pass for the new pass manager. +/// +/// This pass wires together the inlining utilities and the inline cost +/// analysis into a module pass. Different from SCC inliner, it considers every +/// call in every function in the whole module and tries to inline if +/// profitable. With this module level inliner, it is possible to evaluate more +/// heuristics in the module level such like PriorityInlineOrder. It can be +/// tuned with a number of parameters to control what cost model is used and +/// what tradeoffs are made when making the decision. +class ModuleInlinerPass : public PassInfoMixin<ModuleInlinerPass> { +public: + ModuleInlinerPass(InlineParams Params = getInlineParams(), + InliningAdvisorMode Mode = InliningAdvisorMode::Default) + : Params(Params), Mode(Mode){}; + ModuleInlinerPass(ModuleInlinerPass &&Arg) = default; + + PreservedAnalyses run(Module &, ModuleAnalysisManager &); + +private: + InlineAdvisor &getAdvisor(const ModuleAnalysisManager &MAM, + FunctionAnalysisManager &FAM, Module &M); + std::unique_ptr<InlineAdvisor> OwnedAdvisor; + const InlineParams Params; + const InliningAdvisorMode Mode; +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_MODULEINLINER_H diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h index 4f941d26df4c..7f321a688aff 100644 --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -154,7 +154,6 @@ public: /// tests. const ModuleSummaryIndex *ImportSummary = nullptr; - bool DisableTailCalls; bool DisableUnrollLoops; bool CallGraphProfile; bool SLPVectorize; diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h index 0adaa1b16d54..6e45f8f6fb05 100644 --- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h +++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDCALLGRAPH_H -#define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDCALLGRAPH_H +#ifndef LLVM_TRANSFORMS_IPO_PROFILEDCALLGRAPH_H +#define LLVM_TRANSFORMS_IPO_PROFILEDCALLGRAPH_H #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/StringMap.h" @@ -42,7 +42,7 @@ public: using iterator = std::set<ProfiledCallGraphNode *>::iterator; // Constructor for non-CS profile. - ProfiledCallGraph(StringMap<FunctionSamples> &ProfileMap) { + ProfiledCallGraph(SampleProfileMap &ProfileMap) { assert(!FunctionSamples::ProfileIsCS && "CS profile is not handled here"); for (const auto &Samples : ProfileMap) { addProfiledCalls(Samples.second); @@ -56,7 +56,7 @@ public: std::queue<ContextTrieNode *> Queue; for (auto &Child : ContextTracker.getRootContext().getAllChildContext()) { ContextTrieNode *Callee = &Child.second; - addProfiledFunction(Callee->getFuncName()); + addProfiledFunction(ContextTracker.getFuncNameFor(Callee)); Queue.push(Callee); } @@ -72,9 +72,10 @@ public: // context-based one, which may in turn block context-based inlining. for (auto &Child : Caller->getAllChildContext()) { ContextTrieNode *Callee = &Child.second; - addProfiledFunction(Callee->getFuncName()); + addProfiledFunction(ContextTracker.getFuncNameFor(Callee)); Queue.push(Callee); - addProfiledCall(Caller->getFuncName(), Callee->getFuncName()); + addProfiledCall(ContextTracker.getFuncNameFor(Caller), + ContextTracker.getFuncNameFor(Callee)); } } } diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h index 94f7796298db..5d80da407d7e 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h +++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h @@ -42,31 +42,34 @@ public: : ParentContext(Parent), FuncName(FName), FuncSamples(FSamples), CallSiteLoc(CallLoc){}; ContextTrieNode *getChildContext(const LineLocation &CallSite, - StringRef CalleeName); + StringRef ChildName); ContextTrieNode *getHottestChildContext(const LineLocation &CallSite); ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite, - StringRef CalleeName, + StringRef ChildName, bool AllowCreate = true); ContextTrieNode &moveToChildContext(const LineLocation &CallSite, ContextTrieNode &&NodeToMove, - StringRef ContextStrToRemove, + uint32_t ContextFramesToRemove, bool DeleteNode = true); - void removeChildContext(const LineLocation &CallSite, StringRef CalleeName); - std::map<uint32_t, ContextTrieNode> &getAllChildContext(); + void removeChildContext(const LineLocation &CallSite, StringRef ChildName); + std::map<uint64_t, ContextTrieNode> &getAllChildContext(); StringRef getFuncName() const; FunctionSamples *getFunctionSamples() const; void setFunctionSamples(FunctionSamples *FSamples); + Optional<uint32_t> getFunctionSize() const; + void addFunctionSize(uint32_t FSize); LineLocation getCallSiteLoc() const; ContextTrieNode *getParentContext() const; void setParentContext(ContextTrieNode *Parent); - void dump(); + void dumpNode(); + void dumpTree(); private: - static uint32_t nodeHash(StringRef ChildName, const LineLocation &Callsite); + static uint64_t nodeHash(StringRef ChildName, const LineLocation &Callsite); // Map line+discriminator location to child context - std::map<uint32_t, ContextTrieNode> AllChildContext; + std::map<uint64_t, ContextTrieNode> AllChildContext; // Link to parent context node ContextTrieNode *ParentContext; @@ -77,6 +80,9 @@ private: // Function Samples for current context FunctionSamples *FuncSamples; + // Function size for current context + Optional<uint32_t> FuncSize; + // Callsite location in parent context LineLocation CallSiteLoc; }; @@ -90,9 +96,22 @@ private: // calling context and the context is identified by path from root to the node. class SampleContextTracker { public: - using ContextSamplesTy = SmallVector<FunctionSamples *, 16>; - - SampleContextTracker(StringMap<FunctionSamples> &Profiles); + struct ProfileComparer { + bool operator()(FunctionSamples *A, FunctionSamples *B) const { + // Sort function profiles by the number of total samples and their + // contexts. + if (A->getTotalSamples() == B->getTotalSamples()) + return A->getContext() < B->getContext(); + return A->getTotalSamples() > B->getTotalSamples(); + } + }; + + // Keep profiles of a function sorted so that they will be processed/promoted + // deterministically. + using ContextSamplesTy = std::set<FunctionSamples *, ProfileComparer>; + + SampleContextTracker(SampleProfileMap &Profiles, + const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap); // Query context profile for a specific callee with given name at a given // call-site. The full context is identified by location of call instruction. FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst, @@ -116,6 +135,8 @@ public: FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext = true); // Retrieve the context trie node for given profile context ContextTrieNode *getContextFor(const SampleContext &Context); + // Get real function name for a given trie node. + StringRef getFuncNameFor(ContextTrieNode *Node) const; // Mark a context profile as inlined when function is inlined. // This makes sure that inlined context profile will be excluded in // function's base profile. @@ -136,14 +157,18 @@ private: ContextTrieNode &addTopLevelContextNode(StringRef FName); ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo); void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode, - StringRef ContextStrToRemove); - ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &FromNode, - ContextTrieNode &ToNodeParent, - StringRef ContextStrToRemove); + uint32_t ContextFramesToRemove); + ContextTrieNode & + promoteMergeContextSamplesTree(ContextTrieNode &FromNode, + ContextTrieNode &ToNodeParent, + uint32_t ContextFramesToRemove); // Map from function name to context profiles (excluding base profile) StringMap<ContextSamplesTy> FuncToCtxtProfiles; + // Map from function guid to real function names. Only used in md5 mode. + const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap; + // Root node for context trie tree ContextTrieNode RootContext; }; diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h index af6d2a18a25a..6dee38c83b36 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h @@ -18,12 +18,14 @@ #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/InstCombine/InstCombineWorklist.h" + +#define DEBUG_TYPE "instcombine" +#include "llvm/Transforms/Utils/InstructionWorklist.h" namespace llvm { class InstCombinePass : public PassInfoMixin<InstCombinePass> { - InstCombineWorklist Worklist; + InstructionWorklist Worklist; const unsigned MaxIterations; public: @@ -38,7 +40,7 @@ public: /// This is a basic whole-function wrapper around the instcombine utility. It /// will try to combine all instructions in the function. class InstructionCombiningPass : public FunctionPass { - InstCombineWorklist Worklist; + InstructionWorklist Worklist; const unsigned MaxIterations; public: @@ -67,4 +69,6 @@ FunctionPass *createInstructionCombiningPass(); FunctionPass *createInstructionCombiningPass(unsigned MaxIterations); } +#undef DEBUG_TYPE + #endif diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index ba0d41f9b748..c6aee439b5a0 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -25,10 +25,10 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" -#include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include <cassert> #define DEBUG_TYPE "instcombine" +#include "llvm/Transforms/Utils/InstructionWorklist.h" namespace llvm { @@ -43,7 +43,9 @@ class TargetTransformInfo; /// This class provides both the logic to recursively visit instructions and /// combine them. class LLVM_LIBRARY_VISIBILITY InstCombiner { - /// Only used to call target specific inst combining. + /// Only used to call target specific intrinsic combining. + /// It must **NOT** be used for any other purpose, as InstCombine is a + /// target-independent canonicalization transform. TargetTransformInfo &TTI; public: @@ -57,7 +59,7 @@ public: protected: /// A worklist of the instructions that need to be simplified. - InstCombineWorklist &Worklist; + InstructionWorklist &Worklist; // Mode in which we are running the combiner. const bool MinimizeSize; @@ -81,7 +83,7 @@ protected: bool MadeIRChange = false; public: - InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, + InstCombiner(InstructionWorklist &Worklist, BuilderTy &Builder, bool MinimizeSize, AAResults *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, @@ -165,16 +167,16 @@ public: switch (Pred) { case ICmpInst::ICMP_SLT: // True if LHS s< 0 TrueIfSigned = true; - return RHS.isNullValue(); + return RHS.isZero(); case ICmpInst::ICMP_SLE: // True if LHS s<= -1 TrueIfSigned = true; - return RHS.isAllOnesValue(); + return RHS.isAllOnes(); case ICmpInst::ICMP_SGT: // True if LHS s> -1 TrueIfSigned = false; - return RHS.isAllOnesValue(); + return RHS.isAllOnes(); case ICmpInst::ICMP_SGE: // True if LHS s>= 0 TrueIfSigned = false; - return RHS.isNullValue(); + return RHS.isZero(); case ICmpInst::ICMP_UGT: // True if LHS u> RHS and RHS == sign-bit-mask - 1 TrueIfSigned = true; @@ -246,12 +248,13 @@ public: // If `V` is of the form `A + Constant` then `-1 - V` can be folded into // `(-1 - Constant) - A` if we are willing to invert all of the uses. - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) - if (BO->getOpcode() == Instruction::Add || - BO->getOpcode() == Instruction::Sub) - if (match(BO, PatternMatch::m_c_BinOp(PatternMatch::m_Value(), - PatternMatch::m_ImmConstant()))) - return WillInvertAllUses; + if (match(V, m_Add(PatternMatch::m_Value(), PatternMatch::m_ImmConstant()))) + return WillInvertAllUses; + + // If `V` is of the form `Constant - A` then `-1 - V` can be folded into + // `A + (-1 - Constant)` if we are willing to invert all of the uses. + if (match(V, m_Sub(PatternMatch::m_ImmConstant(), PatternMatch::m_Value()))) + return WillInvertAllUses; // Selects with invertible operands are freely invertible if (match(V, @@ -259,6 +262,12 @@ public: m_Not(PatternMatch::m_Value())))) return WillInvertAllUses; + // Min/max may be in the form of intrinsics, so handle those identically + // to select patterns. + if (match(V, m_MaxOrMin(m_Not(PatternMatch::m_Value()), + m_Not(PatternMatch::m_Value())))) + return WillInvertAllUses; + return false; } @@ -354,14 +363,6 @@ public: return ConstantVector::get(Out); } - /// Create and insert the idiom we use to indicate a block is unreachable - /// without having to rewrite the CFG from within InstCombine. - static void CreateNonTerminatorUnreachable(Instruction *InsertAt) { - auto &Ctx = InsertAt->getContext(); - new StoreInst(ConstantInt::getTrue(Ctx), - UndefValue::get(Type::getInt1PtrTy(Ctx)), InsertAt); - } - void addToWorklist(Instruction *I) { Worklist.push(I); } AssumptionCache &getAssumptionCache() const { return AC; } @@ -479,6 +480,11 @@ public: return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT); } + unsigned ComputeMinSignedBits(const Value *Op, unsigned Depth = 0, + const Instruction *CxtI = nullptr) const { + return llvm::ComputeMinSignedBits(Op, DL, Depth, &AC, CxtI, &DT); + } + OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const Instruction *CxtI) const { diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h index 03108bacb0da..a288a3972c3d 100644 --- a/llvm/include/llvm/Transforms/Instrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation.h @@ -78,7 +78,7 @@ struct GCOVOptions { ModulePass *createGCOVProfilerPass(const GCOVOptions &Options = GCOVOptions::getDefault()); -// PGO Instrumention. Parameter IsCS indicates if this is the context senstive +// PGO Instrumention. Parameter IsCS indicates if this is the context sensitive // instrumentation. ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false); ModulePass * @@ -138,7 +138,7 @@ struct InstrProfOptions { }; /// Insert frontend instrumentation based profiling. Parameter IsCS indicates if -// this is the context senstive instrumentation. +// this is the context sensitive instrumentation. ModulePass *createInstrProfilingLegacyPass( const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false); @@ -169,6 +169,8 @@ struct SanitizerCoverageOptions { bool PCTable = false; bool NoPrune = false; bool StackDepth = false; + bool TraceLoads = false; + bool TraceStores = false; SanitizerCoverageOptions() = default; }; diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h index 3781253d2694..c13407a44091 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h @@ -1,9 +1,8 @@ //===--------- Definition of the AddressSanitizer class ---------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -90,6 +89,14 @@ private: static AnalysisKey Key; }; +struct AddressSanitizerOptions { + bool CompileKernel = false; + bool Recover = false; + bool UseAfterScope = false; + AsanDetectStackUseAfterReturnMode UseAfterReturn = + AsanDetectStackUseAfterReturnMode::Runtime; +}; + /// Public interface to the address sanitizer pass for instrumenting code to /// check for various memory errors at runtime. /// @@ -99,19 +106,15 @@ private: /// surrounding requested memory to be checked for invalid accesses. class AddressSanitizerPass : public PassInfoMixin<AddressSanitizerPass> { public: - explicit AddressSanitizerPass( - bool CompileKernel = false, bool Recover = false, - bool UseAfterScope = false, - AsanDetectStackUseAfterReturnMode UseAfterReturn = - AsanDetectStackUseAfterReturnMode::Runtime); + AddressSanitizerPass(const AddressSanitizerOptions &Options) + : Options(Options){}; PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); static bool isRequired() { return true; } private: - bool CompileKernel; - bool Recover; - bool UseAfterScope; - AsanDetectStackUseAfterReturnMode UseAfterReturn; + AddressSanitizerOptions Options; }; /// Public interface to the address sanitizer module pass for instrumenting code @@ -122,16 +125,17 @@ private: class ModuleAddressSanitizerPass : public PassInfoMixin<ModuleAddressSanitizerPass> { public: - explicit ModuleAddressSanitizerPass( - bool CompileKernel = false, bool Recover = false, bool UseGlobalGC = true, + ModuleAddressSanitizerPass( + const AddressSanitizerOptions &Options, bool UseGlobalGC = true, bool UseOdrIndicator = false, AsanDtorKind DestructorKind = AsanDtorKind::Global); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); static bool isRequired() { return true; } private: - bool CompileKernel; - bool Recover; + AddressSanitizerOptions Options; bool UseGlobalGC; bool UseOdrIndicator; AsanDtorKind DestructorKind; @@ -148,6 +152,16 @@ ModulePass *createModuleAddressSanitizerLegacyPassPass( bool UseOdrIndicator = true, AsanDtorKind DestructorKind = AsanDtorKind::Global); +struct ASanAccessInfo { + const int32_t Packed; + const uint8_t AccessSizeIndex; + const bool IsWrite; + const bool CompileKernel; + + explicit ASanAccessInfo(int32_t Packed); + ASanAccessInfo(bool IsWrite, bool CompileKernel, uint8_t AccessSizeIndex); +}; + } // namespace llvm #endif diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h index 0228992af874..6c351e3f8e1f 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h @@ -1,9 +1,8 @@ //===--------- Definition of the AddressSanitizer class ---------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,6 +17,7 @@ #include "llvm/Analysis/PostDominators.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" namespace llvm { @@ -26,7 +26,6 @@ class InterestingMemoryOperand { public: Use *PtrUse; bool IsWrite; - Type *OpType; uint64_t TypeSize; MaybeAlign Alignment; // The mask Value, if we're looking at a masked load/store. @@ -35,8 +34,7 @@ public: InterestingMemoryOperand(Instruction *I, unsigned OperandNo, bool IsWrite, class Type *OpType, MaybeAlign Alignment, Value *MaybeMask = nullptr) - : IsWrite(IsWrite), OpType(OpType), Alignment(Alignment), - MaybeMask(MaybeMask) { + : IsWrite(IsWrite), Alignment(Alignment), MaybeMask(MaybeMask) { const DataLayout &DL = I->getModule()->getDataLayout(); TypeSize = DL.getTypeStoreSizeInBits(OpType); PtrUse = &I->getOperandUse(OperandNo); @@ -47,47 +45,56 @@ public: Value *getPtr() { return PtrUse->get(); } }; -// For an alloca valid between lifetime markers Start and End, call the +// For an alloca valid between lifetime markers Start and Ends, call the // Callback for all possible exits out of the lifetime in the containing // function, which can return from the instructions in RetVec. // -// Returns whether End was the only possible exit. If it wasn't, the caller -// should remove End to ensure that work done at the other exits does not -// happen outside of the lifetime. +// Returns whether Ends covered all possible exits. If they did not, +// the caller should remove Ends to ensure that work done at the other +// exits does not happen outside of the lifetime. template <typename F> bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT, - const Instruction *Start, Instruction *End, + const Instruction *Start, + const SmallVectorImpl<IntrinsicInst *> &Ends, const SmallVectorImpl<Instruction *> &RetVec, F Callback) { - // We need to ensure that if we tag some object, we certainly untag it - // before the function exits. - if (PDT.dominates(End, Start)) { - Callback(End); - } else { - SmallVector<Instruction *, 8> ReachableRetVec; - unsigned NumCoveredExits = 0; - for (auto &RI : RetVec) { - if (!isPotentiallyReachable(Start, RI, nullptr, &DT)) - continue; - ReachableRetVec.push_back(RI); - if (DT.dominates(End, RI)) - ++NumCoveredExits; - } - // If there's a mix of covered and non-covered exits, just put the untag - // on exits, so we avoid the redundancy of untagging twice. - if (NumCoveredExits == ReachableRetVec.size()) { + if (Ends.size() == 1 && PDT.dominates(Ends[0], Start)) { + Callback(Ends[0]); + return true; + } + SmallVector<Instruction *, 8> ReachableRetVec; + unsigned NumCoveredExits = 0; + for (auto &RI : RetVec) { + if (!isPotentiallyReachable(Start, RI, nullptr, &DT)) + continue; + ReachableRetVec.push_back(RI); + // TODO(fmayer): We don't support diamond shapes, where multiple lifetime + // ends together dominate the RI, but none of them does by itself. + // Check how often this happens and decide whether to support this here. + if (std::any_of(Ends.begin(), Ends.end(), + [&](Instruction *End) { return DT.dominates(End, RI); })) + ++NumCoveredExits; + } + // If there's a mix of covered and non-covered exits, just put the untag + // on exits, so we avoid the redundancy of untagging twice. + if (NumCoveredExits == ReachableRetVec.size()) { + for (auto *End : Ends) Callback(End); - } else { - for (auto &RI : ReachableRetVec) - Callback(RI); - // We may have inserted untag outside of the lifetime interval. - // Signal the caller to remove the lifetime end call for this alloca. - return false; - } + } else { + for (auto &RI : ReachableRetVec) + Callback(RI); + // We may have inserted untag outside of the lifetime interval. + // Signal the caller to remove the lifetime end call for this alloca. + return false; } return true; } +// Get AddressSanitizer parameters. +void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize, + bool IsKasan, uint64_t *ShadowBase, + int *MappingScale, bool *OrShadowOffset); + } // namespace llvm #endif diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h index 029b3fc4b788..f019d1c00a35 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h @@ -1,9 +1,8 @@ //===--------- Definition of the AddressSanitizer options -------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // This file defines data types used to set Address Sanitizer options. diff --git a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h index 2e4f3338030a..3118a3762935 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h @@ -1,9 +1,8 @@ //===--------- Definition of the HWAddressSanitizer class -------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -19,21 +18,32 @@ namespace llvm { +struct HWAddressSanitizerOptions { + HWAddressSanitizerOptions() + : HWAddressSanitizerOptions(false, false, false){}; + HWAddressSanitizerOptions(bool CompileKernel, bool Recover, + bool DisableOptimization) + : CompileKernel(CompileKernel), Recover(Recover), + DisableOptimization(DisableOptimization){}; + bool CompileKernel; + bool Recover; + bool DisableOptimization; +}; + /// This is a public interface to the hardware address sanitizer pass for /// instrumenting code to check for various memory errors at runtime, similar to /// AddressSanitizer but based on partial hardware assistance. class HWAddressSanitizerPass : public PassInfoMixin<HWAddressSanitizerPass> { public: - explicit HWAddressSanitizerPass(bool CompileKernel = false, - bool Recover = false, - bool DisableOptimization = false); + explicit HWAddressSanitizerPass(HWAddressSanitizerOptions Options) + : Options(Options){}; PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); static bool isRequired() { return true; } + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); private: - bool CompileKernel; - bool Recover; - bool DisableOptimization; + HWAddressSanitizerOptions Options; }; FunctionPass * diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h b/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h index f0f375e0acf6..e3d75f675c93 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h +++ b/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h @@ -1,9 +1,8 @@ //===- InstrOrderFile.h ---- Late IR instrumentation for order file ----===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h index ac6a07d299a6..f4d1b1d90e6f 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h @@ -1,9 +1,8 @@ //===--------- Definition of the MemProfiler class --------------*- C++ -*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h index f5f9ec7829bd..d47beb93397e 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h @@ -40,6 +40,23 @@ struct MemorySanitizerPass : public PassInfoMixin<MemorySanitizerPass> { MemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); + static bool isRequired() { return true; } + +private: + MemorySanitizerOptions Options; +}; + +/// A module pass for msan instrumentation. +/// +/// Instruments functions to detect unitialized reads. This function pass +/// inserts calls to runtime library functions. If the functions aren't declared +/// yet, the pass inserts the declarations. Otherwise the existing globals are +/// used. +struct ModuleMemorySanitizerPass : public PassInfoMixin<ModuleMemorySanitizerPass> { + ModuleMemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); static bool isRequired() { return true; } diff --git a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h index f9c507624e6d..e795043630d5 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h @@ -27,6 +27,14 @@ FunctionPass *createThreadSanitizerLegacyPassPass(); /// yet, the pass inserts the declarations. Otherwise the existing globals are struct ThreadSanitizerPass : public PassInfoMixin<ThreadSanitizerPass> { PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + static bool isRequired() { return true; } +}; + +/// A module pass for tsan instrumentation. +/// +/// Create ctor and init functions. +struct ModuleThreadSanitizerPass + : public PassInfoMixin<ModuleThreadSanitizerPass> { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); static bool isRequired() { return true; } }; diff --git a/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h b/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h index 1e7fd71dcbf4..877d8145e746 100644 --- a/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h +++ b/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h @@ -32,6 +32,8 @@ struct EarlyCSEPass : PassInfoMixin<EarlyCSEPass> { /// Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); bool UseMemorySSA; }; diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h index 5c29b289d158..cbe5057b9cde 100644 --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -115,17 +115,20 @@ struct GVNOptions { /// /// FIXME: We should have a good summary of the GVN algorithm implemented by /// this particular pass here. -class GVN : public PassInfoMixin<GVN> { +class GVNPass : public PassInfoMixin<GVNPass> { GVNOptions Options; public: struct Expression; - GVN(GVNOptions Options = {}) : Options(Options) {} + GVNPass(GVNOptions Options = {}) : Options(Options) {} /// Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); + /// This removes the specified instruction from /// our various maps and marks it for deletion. void markInstructionForDeletion(Instruction *I) { @@ -179,11 +182,11 @@ public: Expression createExtractvalueExpr(ExtractValueInst *EI); uint32_t lookupOrAddCall(CallInst *C); uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock, - uint32_t Num, GVN &Gvn); + uint32_t Num, GVNPass &Gvn); bool areCallValsEqual(uint32_t Num, uint32_t NewNum, const BasicBlock *Pred, - const BasicBlock *PhiBlock, GVN &Gvn); + const BasicBlock *PhiBlock, GVNPass &Gvn); std::pair<uint32_t, bool> assignExpNewValueNum(Expression &exp); - bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVN &Gvn); + bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVNPass &Gvn); public: ValueTable(); @@ -197,7 +200,7 @@ public: uint32_t lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Pred, Value *LHS, Value *RHS); uint32_t phiTranslate(const BasicBlock *BB, const BasicBlock *PhiBlock, - uint32_t Num, GVN &Gvn); + uint32_t Num, GVNPass &Gvn); void eraseTranslateCacheEntry(uint32_t Num, const BasicBlock &CurrBlock); bool exists(Value *V) const; void add(Value *V, uint32_t num); diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h index 816ea1071e52..0ac7d7c62b7a 100644 --- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -44,6 +44,7 @@ class PHINode; class SelectInst; class SwitchInst; class TargetLibraryInfo; +class TargetTransformInfo; class Value; /// A private "module" namespace for types and utilities used by @@ -78,6 +79,7 @@ enum ConstantPreference { WantInteger, WantBlockAddress }; /// revectored to the false side of the second if. class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> { TargetLibraryInfo *TLI; + TargetTransformInfo *TTI; LazyValueInfo *LVI; AAResults *AA; DomTreeUpdater *DTU; @@ -99,9 +101,9 @@ public: JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1); // Glue for old PM. - bool runImpl(Function &F, TargetLibraryInfo *TLI, LazyValueInfo *LVI, - AAResults *AA, DomTreeUpdater *DTU, bool HasProfileData, - std::unique_ptr<BlockFrequencyInfo> BFI, + bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, + LazyValueInfo *LVI, AAResults *AA, DomTreeUpdater *DTU, + bool HasProfileData, std::unique_ptr<BlockFrequencyInfo> BFI, std::unique_ptr<BranchProbabilityInfo> BPI); PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h index 020cfb9a6c85..419729271a23 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h @@ -94,6 +94,8 @@ public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); /// Add either a loop pass or a loop-nest pass to the pass manager. Append \p /// Pass to the list of loop passes if it has a dedicated \fn run() method for /// loops and to the list of loop-nest passes if the \fn run() method is for @@ -101,51 +103,65 @@ public: /// to the end of \var IsLoopNestPass so we can easily identify the types of /// passes in the pass manager later. template <typename PassT> - std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value> - addPass(PassT &&Pass) { + LLVM_ATTRIBUTE_MINSIZE + std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value> + addPass(PassT &&Pass) { using LoopPassModelT = detail::PassModel<Loop, PassT, PreservedAnalyses, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater &>; IsLoopNestPass.push_back(false); - LoopPasses.emplace_back(new LoopPassModelT(std::forward<PassT>(Pass))); + // Do not use make_unique or emplace_back, they cause too many template + // instantiations, causing terrible compile times. + LoopPasses.push_back(std::unique_ptr<LoopPassConceptT>( + new LoopPassModelT(std::forward<PassT>(Pass)))); } template <typename PassT> - std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value> - addPass(PassT &&Pass) { + LLVM_ATTRIBUTE_MINSIZE + std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value> + addPass(PassT &&Pass) { using LoopNestPassModelT = detail::PassModel<LoopNest, PassT, PreservedAnalyses, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater &>; IsLoopNestPass.push_back(true); - LoopNestPasses.emplace_back( - new LoopNestPassModelT(std::forward<PassT>(Pass))); + // Do not use make_unique or emplace_back, they cause too many template + // instantiations, causing terrible compile times. + LoopNestPasses.push_back(std::unique_ptr<LoopNestPassConceptT>( + new LoopNestPassModelT(std::forward<PassT>(Pass)))); } // Specializations of `addPass` for `RepeatedPass`. These are necessary since // `RepeatedPass` has a templated `run` method that will result in incorrect // detection of `HasRunOnLoopT`. template <typename PassT> - std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value> - addPass(RepeatedPass<PassT> &&Pass) { + LLVM_ATTRIBUTE_MINSIZE + std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value> + addPass(RepeatedPass<PassT> &&Pass) { using RepeatedLoopPassModelT = detail::PassModel<Loop, RepeatedPass<PassT>, PreservedAnalyses, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater &>; IsLoopNestPass.push_back(false); - LoopPasses.emplace_back(new RepeatedLoopPassModelT(std::move(Pass))); + // Do not use make_unique or emplace_back, they cause too many template + // instantiations, causing terrible compile times. + LoopPasses.push_back(std::unique_ptr<LoopPassConceptT>( + new RepeatedLoopPassModelT(std::move(Pass)))); } template <typename PassT> - std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value> - addPass(RepeatedPass<PassT> &&Pass) { + LLVM_ATTRIBUTE_MINSIZE + std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value> + addPass(RepeatedPass<PassT> &&Pass) { using RepeatedLoopNestPassModelT = detail::PassModel<LoopNest, RepeatedPass<PassT>, PreservedAnalyses, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater &>; IsLoopNestPass.push_back(true); - LoopNestPasses.emplace_back( - new RepeatedLoopNestPassModelT(std::move(Pass))); + // Do not use make_unique or emplace_back, they cause too many template + // instantiations, causing terrible compile times. + LoopNestPasses.push_back(std::unique_ptr<LoopNestPassConceptT>( + new RepeatedLoopNestPassModelT(std::move(Pass)))); } bool isEmpty() const { return LoopPasses.empty() && LoopNestPasses.empty(); } @@ -215,6 +231,12 @@ struct RequireAnalysisPass<AnalysisT, Loop, LoopAnalysisManager, (void)AM.template getResult<AnalysisT>(L, AR); return PreservedAnalyses::all(); } + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName) { + auto ClassName = AnalysisT::name(); + auto PassName = MapClassName2PassName(ClassName); + OS << "require<" << PassName << ">"; + } }; /// An alias template to easily name a require analysis loop pass. @@ -259,8 +281,6 @@ public: /// state, this routine will mark that the current loop should be skipped by /// the rest of the pass management infrastructure. void markLoopAsDeleted(Loop &L, llvm::StringRef Name) { - assert((!LoopNestMode || CurrentL == &L) && - "L should be a top-level loop in loop-nest mode."); LAM.clear(L, Name); assert((&L == CurrentL || CurrentL->contains(&L)) && "Cannot delete a loop outside of the " @@ -413,10 +433,12 @@ public: explicit FunctionToLoopPassAdaptor(std::unique_ptr<PassConceptT> Pass, bool UseMemorySSA = false, bool UseBlockFrequencyInfo = false, + bool UseBranchProbabilityInfo = false, bool LoopNestMode = false) : Pass(std::move(Pass)), LoopCanonicalizationFPM(), UseMemorySSA(UseMemorySSA), UseBlockFrequencyInfo(UseBlockFrequencyInfo), + UseBranchProbabilityInfo(UseBranchProbabilityInfo), LoopNestMode(LoopNestMode) { LoopCanonicalizationFPM.addPass(LoopSimplifyPass()); LoopCanonicalizationFPM.addPass(LCSSAPass()); @@ -424,6 +446,8 @@ public: /// Runs the loop passes across every loop in the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); static bool isRequired() { return true; } @@ -436,6 +460,7 @@ private: bool UseMemorySSA = false; bool UseBlockFrequencyInfo = false; + bool UseBranchProbabilityInfo = false; const bool LoopNestMode; }; @@ -447,13 +472,17 @@ template <typename LoopPassT> inline std::enable_if_t<is_detected<HasRunOnLoopT, LoopPassT>::value, FunctionToLoopPassAdaptor> createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA = false, - bool UseBlockFrequencyInfo = false) { + bool UseBlockFrequencyInfo = false, + bool UseBranchProbabilityInfo = false) { using PassModelT = detail::PassModel<Loop, LoopPassT, PreservedAnalyses, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater &>; + // Do not use make_unique, it causes too many template instantiations, + // causing terrible compile times. return FunctionToLoopPassAdaptor( - std::make_unique<PassModelT>(std::forward<LoopPassT>(Pass)), UseMemorySSA, - UseBlockFrequencyInfo, false); + std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>( + new PassModelT(std::forward<LoopPassT>(Pass))), + UseMemorySSA, UseBlockFrequencyInfo, UseBranchProbabilityInfo, false); } /// If \p Pass is a loop-nest pass, \p Pass will first be wrapped into a @@ -462,24 +491,29 @@ template <typename LoopNestPassT> inline std::enable_if_t<!is_detected<HasRunOnLoopT, LoopNestPassT>::value, FunctionToLoopPassAdaptor> createFunctionToLoopPassAdaptor(LoopNestPassT &&Pass, bool UseMemorySSA = false, - bool UseBlockFrequencyInfo = false) { + bool UseBlockFrequencyInfo = false, + bool UseBranchProbabilityInfo = false) { LoopPassManager LPM; LPM.addPass(std::forward<LoopNestPassT>(Pass)); using PassModelT = detail::PassModel<Loop, LoopPassManager, PreservedAnalyses, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater &>; - return FunctionToLoopPassAdaptor(std::make_unique<PassModelT>(std::move(LPM)), - UseMemorySSA, UseBlockFrequencyInfo, true); + // Do not use make_unique, it causes too many template instantiations, + // causing terrible compile times. + return FunctionToLoopPassAdaptor( + std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>( + new PassModelT(std::move(LPM))), + UseMemorySSA, UseBlockFrequencyInfo, UseBranchProbabilityInfo, true); } /// If \p Pass is an instance of \c LoopPassManager, the returned adaptor will /// be in loop-nest mode if the pass manager contains only loop-nest passes. template <> inline FunctionToLoopPassAdaptor -createFunctionToLoopPassAdaptor<LoopPassManager>(LoopPassManager &&LPM, - bool UseMemorySSA, - bool UseBlockFrequencyInfo) { +createFunctionToLoopPassAdaptor<LoopPassManager>( + LoopPassManager &&LPM, bool UseMemorySSA, bool UseBlockFrequencyInfo, + bool UseBranchProbabilityInfo) { // Check if LPM contains any loop pass and if it does not, returns an adaptor // in loop-nest mode. using PassModelT = @@ -487,9 +521,13 @@ createFunctionToLoopPassAdaptor<LoopPassManager>(LoopPassManager &&LPM, LoopAnalysisManager, LoopStandardAnalysisResults &, LPMUpdater &>; bool LoopNestMode = (LPM.getNumLoopPasses() == 0); - return FunctionToLoopPassAdaptor(std::make_unique<PassModelT>(std::move(LPM)), - UseMemorySSA, UseBlockFrequencyInfo, - LoopNestMode); + // Do not use make_unique, it causes too many template instantiations, + // causing terrible compile times. + return FunctionToLoopPassAdaptor( + std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>( + new PassModelT(std::move(LPM))), + UseMemorySSA, UseBlockFrequencyInfo, UseBranchProbabilityInfo, + LoopNestMode); } /// Pass for printing a loop's contents as textual IR. diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h index 30cc08cb42ae..6afe7ecd2a5d 100644 --- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h @@ -140,6 +140,8 @@ public: : UnrollOpts(UnrollOpts) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h b/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h index dd574e4f32c6..d44d297dd4ff 100644 --- a/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h +++ b/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h @@ -23,6 +23,8 @@ class LowerMatrixIntrinsicsPass public: LowerMatrixIntrinsicsPass(bool Minimal = false) : Minimal(Minimal) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); static bool isRequired() { return true; } }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index 635b706d0bef..3a4db13d670a 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -31,7 +31,6 @@ class Instruction; class LoadInst; class MemCpyInst; class MemMoveInst; -class MemoryDependenceResults; class MemorySSA; class MemorySSAUpdater; class MemSetInst; @@ -40,7 +39,6 @@ class TargetLibraryInfo; class Value; class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> { - MemoryDependenceResults *MD = nullptr; TargetLibraryInfo *TLI = nullptr; AAResults *AA = nullptr; AssumptionCache *AC = nullptr; @@ -54,9 +52,8 @@ public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); // Glue for the old PM. - bool runImpl(Function &F, MemoryDependenceResults *MD, TargetLibraryInfo *TLI, - AAResults *AA, AssumptionCache *AC, DominatorTree *DT, - MemorySSA *MSSA); + bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, + AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA); private: // Helper functions @@ -65,7 +62,7 @@ private: bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI); bool processMemMove(MemMoveInst *M); bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore, - Value *cpyDst, Value *cpySrc, uint64_t cpyLen, + Value *cpyDst, Value *cpySrc, TypeSize cpyLen, Align cpyAlign, CallInst *C); bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep); bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet); diff --git a/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h b/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h index c5f6d6e0e8bd..256d03675a07 100644 --- a/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h +++ b/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h @@ -48,6 +48,8 @@ public: MergedLoadStoreMotionPass(const MergedLoadStoreMotionOptions &PassOptions) : Options(PassOptions) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); }; } diff --git a/llvm/include/llvm/Transforms/Scalar/SROA.h b/llvm/include/llvm/Transforms/Scalar/SROA.h index 6ef7c6b22c0b..f1a43435d89a 100644 --- a/llvm/include/llvm/Transforms/Scalar/SROA.h +++ b/llvm/include/llvm/Transforms/Scalar/SROA.h @@ -62,7 +62,7 @@ class SROALegacyPass; /// onto insert and extract operations on a vector value, and convert them to /// this form. By doing so, it will enable promotion of vector aggregates to /// SSA vector values. -class SROA : public PassInfoMixin<SROA> { +class SROAPass : public PassInfoMixin<SROAPass> { LLVMContext *C = nullptr; DominatorTree *DT = nullptr; AssumptionCache *AC = nullptr; @@ -110,7 +110,7 @@ class SROA : public PassInfoMixin<SROA> { SetVector<SelectInst *, SmallVector<SelectInst *, 2>> SpeculatableSelects; public: - SROA() = default; + SROAPass() = default; /// Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); diff --git a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h index c1a9ab475ead..dfb1619c7f2a 100644 --- a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h +++ b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h @@ -69,6 +69,9 @@ public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); + + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); }; /// Create the legacy pass object for the simple loop unswitcher. diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h index 7c5393851ae6..67db5031a443 100644 --- a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h +++ b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h @@ -41,6 +41,9 @@ public: /// Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); }; } diff --git a/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h b/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h index 0b570c0d1342..f87588db4ee2 100644 --- a/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h +++ b/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h @@ -33,7 +33,7 @@ struct ASanStackVariableDescription { uint64_t Size; // Size of the variable in bytes. size_t LifetimeSize; // Size in bytes to use for lifetime analysis check. // Will be rounded up to Granularity. - size_t Alignment; // Alignment of the variable (power of 2). + uint64_t Alignment; // Alignment of the variable (power of 2). AllocaInst *AI; // The actual AllocaInst. size_t Offset; // Offset from the beginning of the frame; // set by ComputeASanStackFrameLayout. @@ -42,20 +42,20 @@ struct ASanStackVariableDescription { // Output data struct for ComputeASanStackFrameLayout. struct ASanStackFrameLayout { - size_t Granularity; // Shadow granularity. - size_t FrameAlignment; // Alignment for the entire frame. - size_t FrameSize; // Size of the frame in bytes. + uint64_t Granularity; // Shadow granularity. + uint64_t FrameAlignment; // Alignment for the entire frame. + uint64_t FrameSize; // Size of the frame in bytes. }; ASanStackFrameLayout ComputeASanStackFrameLayout( // The array of stack variables. The elements may get reordered and changed. SmallVectorImpl<ASanStackVariableDescription> &Vars, // AddressSanitizer's shadow granularity. Usually 8, may also be 16, 32, 64. - size_t Granularity, + uint64_t Granularity, // The minimal size of the left-most redzone (header). // At least 4 pointer sizes, power of 2, and >= Granularity. // The resulting FrameSize should be multiple of MinHeaderSize. - size_t MinHeaderSize); + uint64_t MinHeaderSize); // Compute frame description, see DescribeAddressIfStack in ASan runtime. SmallString<64> ComputeASanStackFrameDescription( diff --git a/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h b/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h index f512c6c06331..0aee2fe95cad 100644 --- a/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h +++ b/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h @@ -24,6 +24,7 @@ class Function; class AddDiscriminatorsPass : public PassInfoMixin<AddDiscriminatorsPass> { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + static bool isRequired() { return true; } }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h index b45c1820bb20..8970afb3aeaa 100644 --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -129,6 +129,13 @@ void ReplaceInstWithInst(BasicBlock::InstListType &BIL, /// To. Copies DebugLoc from BI to I, if I doesn't already have a DebugLoc. void ReplaceInstWithInst(Instruction *From, Instruction *To); +/// Check if we can prove that all paths starting from this block converge +/// to a block that either has a @llvm.experimental.deoptimize call +/// prior to its terminating return instruction or is terminated by unreachable. +/// All blocks in the traversed sequence must have an unique successor, maybe +/// except for the last one. +bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB); + /// Option class for critical edge splitting. /// /// This provides a builder interface for overriding the default options used @@ -214,29 +221,6 @@ BasicBlock *SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum, CriticalEdgeSplittingOptions(), const Twine &BBName = ""); -inline BasicBlock * -SplitCriticalEdge(BasicBlock *BB, succ_iterator SI, - const CriticalEdgeSplittingOptions &Options = - CriticalEdgeSplittingOptions()) { - return SplitCriticalEdge(BB->getTerminator(), SI.getSuccessorIndex(), - Options); -} - -/// If the edge from *PI to BB is not critical, return false. Otherwise, split -/// all edges between the two blocks and return true. This updates all of the -/// same analyses as the other SplitCriticalEdge function. If P is specified, it -/// updates the analyses described above. -inline bool SplitCriticalEdge(BasicBlock *Succ, pred_iterator PI, - const CriticalEdgeSplittingOptions &Options = - CriticalEdgeSplittingOptions()) { - bool MadeChange = false; - Instruction *TI = (*PI)->getTerminator(); - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - if (TI->getSuccessor(i) == Succ) - MadeChange |= !!SplitCriticalEdge(TI, i, Options); - return MadeChange; -} - /// If an edge from Src to Dst is critical, split the edge and return true, /// otherwise return false. This method requires that there be an edge between /// the two blocks. It updates the analyses passed in the options struct diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h index e7d41933a6c9..87d33b9b11b7 100644 --- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -54,12 +54,6 @@ namespace llvm { /// 'i8*' type. Value *emitStrDup(Value *Ptr, IRBuilderBase &B, const TargetLibraryInfo *TLI); - /// Emit a call to the strnlen function to the builder, for the specified - /// pointer. Ptr is required to be some pointer type, MaxLen must be of size_t - /// type, and the return value has 'intptr_t' type. - Value *emitStrNLen(Value *Ptr, Value *MaxLen, IRBuilderBase &B, - const DataLayout &DL, const TargetLibraryInfo *TLI); - /// Emit a call to the strchr function to the builder, for the specified /// pointer and character. Ptr is required to be some pointer type, and the /// return value has 'i8*' type. @@ -205,8 +199,8 @@ namespace llvm { const TargetLibraryInfo *TLI); /// Emit a call to the calloc function. - Value *emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs, - IRBuilderBase &B, const TargetLibraryInfo &TLI); + Value *emitCalloc(Value *Num, Value *Size, IRBuilderBase &B, + const TargetLibraryInfo &TLI); } #endif diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index f4fb265c25e0..5a1f322b2054 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -296,10 +296,10 @@ BasicBlock *DuplicateInstructionsInSplitBetween(BasicBlock *BB, DomTreeUpdater &DTU); /// Updates profile information by adjusting the entry count by adding -/// entryDelta then scaling callsite information by the new count divided by the +/// EntryDelta then scaling callsite information by the new count divided by the /// old count. VMap is used during inlinng to also update the new clone void updateProfileCallee( - Function *Callee, int64_t entryDelta, + Function *Callee, int64_t EntryDelta, const ValueMap<const Value *, WeakTrackingVH> *VMap = nullptr); /// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index 1d9f2d135488..f08173e45a5b 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -100,6 +100,10 @@ public: unsigned NumExitBlocks = std::numeric_limits<unsigned>::max(); Type *RetTy; + // Mapping from the original exit blocks, to the new blocks inside + // the function. + SmallVector<BasicBlock *, 4> OldTargets; + // Suffix to use when creating extracted function (appended to the original // function name + "."). If empty, the default is to use the entry block // label, if non-empty, otherwise "extracted". @@ -139,6 +143,20 @@ public: /// returns false. Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC); + /// Perform the extraction, returning the new function and providing an + /// interface to see what was categorized as inputs and outputs. + /// + /// \param CEAC - Cache to speed up operations for the CodeExtractor when + /// hoisting, and extracting lifetime values and assumes. + /// \param Inputs [out] - filled with values marked as inputs to the + /// newly outlined function. + /// \param Outputs [out] - filled with values marked as outputs to the + /// newly outlined function. + /// \returns zero when called on a CodeExtractor instance where isEligible + /// returns false. + Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, + ValueSet &Inputs, ValueSet &Outputs); + /// Verify that assumption cache isn't stale after a region is extracted. /// Returns true when verifier finds errors. AssumptionCache is passed as /// parameter to make this function stateless. diff --git a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h index 630f936471f2..0f32a97f9fcc 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h +++ b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h @@ -40,7 +40,8 @@ bool isControlFlowEquivalent(const BasicBlock &BB0, const BasicBlock &BB1, bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint, DominatorTree &DT, const PostDominatorTree *PDT = nullptr, - DependenceInfo *DI = nullptr); + DependenceInfo *DI = nullptr, + bool CheckForEntireBlock = false); /// Return true if all instructions (except the terminator) in \p BB can be /// safely moved before \p InsertPoint. @@ -62,6 +63,19 @@ void moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB, DominatorTree &DT, const PostDominatorTree &PDT, DependenceInfo &DI); +/// In case that two BBs \p ThisBlock and \p OtherBlock are control flow +/// equivalent but they do not strictly dominate and post-dominate each +/// other, we determine if \p ThisBlock is reached after \p OtherBlock +/// in the control flow. +bool nonStrictlyPostDominate(const BasicBlock *ThisBlock, + const BasicBlock *OtherBlock, + const DominatorTree *DT, + const PostDominatorTree *PDT); + +// Check if I0 is reached before I1 in the control flow. +bool isReachedBefore(const Instruction *I0, const Instruction *I1, + const DominatorTree *DT, const PostDominatorTree *PDT); + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_CODEMOVERUTILS_H diff --git a/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h b/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h index 31c023019272..f2b038494a5d 100644 --- a/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h +++ b/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h @@ -27,6 +27,9 @@ struct EntryExitInstrumenterPass PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); + bool PostInlining; static bool isRequired() { return true; } diff --git a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h index 024d84a7abc8..749b7b2bb5d8 100644 --- a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h +++ b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h @@ -130,9 +130,6 @@ bool renameModuleForThinLTO( bool ClearDSOLocalOnDeclarations, SetVector<GlobalValue *> *GlobalsToImport = nullptr); -/// Compute synthetic function entry counts. -void computeSyntheticCounts(ModuleSummaryIndex &Index); - } // End llvm namespace #endif diff --git a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h index 519593c96766..78d7845c4353 100644 --- a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h +++ b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h @@ -9,6 +9,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H #define LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H +#include "llvm/IR/Instructions.h" #include "llvm/Support/AtomicOrdering.h" namespace llvm { @@ -45,7 +46,7 @@ struct GlobalStatus { /// This global is stored to, but only its initializer and one other value /// is ever stored to it. If this global isStoredOnce, we track the value - /// stored to it in StoredOnceValue below. This is only tracked for scalar + /// stored to it via StoredOnceStore below. This is only tracked for scalar /// globals. StoredOnce, @@ -55,8 +56,16 @@ struct GlobalStatus { } StoredType = NotStored; /// If only one value (besides the initializer constant) is ever stored to - /// this global, keep track of what value it is. - Value *StoredOnceValue = nullptr; + /// this global, keep track of what value it is via the store instruction. + const StoreInst *StoredOnceStore = nullptr; + + /// If only one value (besides the initializer constant) is ever stored to + /// this global return the stored value. + Value *getStoredOnceValue() const { + return (StoredType == StoredOnce && StoredOnceStore) + ? StoredOnceStore->getOperand(0) + : nullptr; + } /// These start out null/false. When the first accessing function is noticed, /// it is recorded. When a second different accessing function is noticed, diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/llvm/include/llvm/Transforms/Utils/InstructionWorklist.h index 25aabe199d0f..a318c2cd28bb 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h +++ b/llvm/include/llvm/Transforms/Utils/InstructionWorklist.h @@ -1,4 +1,4 @@ -//===- InstCombineWorklist.h - Worklist for InstCombine pass ----*- C++ -*-===// +//=== InstructionWorklist.h - Worklist for InstCombine & others -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H -#define LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H +#ifndef LLVM_TRANSFORMS_UTILS_INSTRUCTIONWORKLIST_H +#define LLVM_TRANSFORMS_UTILS_INSTRUCTIONWORKLIST_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -18,13 +18,11 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#define DEBUG_TYPE "instcombine" - namespace llvm { -/// InstCombineWorklist - This is the worklist management logic for -/// InstCombine. -class InstCombineWorklist { +/// InstructionWorklist - This is the worklist management logic for +/// InstCombine and other simplification passes. +class InstructionWorklist { SmallVector<Instruction *, 256> Worklist; DenseMap<Instruction *, unsigned> WorklistMap; /// These instructions will be added in reverse order after the current @@ -33,10 +31,10 @@ class InstCombineWorklist { SmallSetVector<Instruction *, 16> Deferred; public: - InstCombineWorklist() = default; + InstructionWorklist() = default; - InstCombineWorklist(InstCombineWorklist &&) = default; - InstCombineWorklist &operator=(InstCombineWorklist &&) = default; + InstructionWorklist(InstructionWorklist &&) = default; + InstructionWorklist &operator=(InstructionWorklist &&) = default; bool isEmpty() const { return Worklist.empty() && Deferred.empty(); } @@ -45,7 +43,7 @@ public: /// You likely want to use this method. void add(Instruction *I) { if (Deferred.insert(I)) - LLVM_DEBUG(dbgs() << "IC: ADD DEFERRED: " << *I << '\n'); + LLVM_DEBUG(dbgs() << "ADD DEFERRED: " << *I << '\n'); } /// Add value to the worklist if it is an instruction. @@ -62,7 +60,7 @@ public: assert(I->getParent() && "Instruction not inserted yet?"); if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) { - LLVM_DEBUG(dbgs() << "IC: ADD: " << *I << '\n'); + LLVM_DEBUG(dbgs() << "ADD: " << *I << '\n'); Worklist.push_back(I); } } @@ -85,7 +83,7 @@ public: /// Remove I from the worklist if it exists. void remove(Instruction *I) { - DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I); + DenseMap<Instruction *, unsigned>::iterator It = WorklistMap.find(I); if (It != WorklistMap.end()) { // Don't bother moving everything down, just null out the slot. Worklist[It->second] = nullptr; @@ -110,7 +108,6 @@ public: push(cast<Instruction>(U)); } - /// Check that the worklist is empty and nuke the backing store for the map. void zap() { assert(WorklistMap.empty() && "Worklist empty, but map not?"); @@ -123,6 +120,4 @@ public: } // end namespace llvm. -#undef DEBUG_TYPE - #endif diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index 0102aa9ef3cc..72cb606eb51a 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -55,6 +55,7 @@ class MDNode; class MemorySSAUpdater; class PHINode; class StoreInst; +class SwitchInst; class TargetLibraryInfo; class TargetTransformInfo; @@ -78,7 +79,8 @@ bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions = false, // /// Return true if the result produced by the instruction is not used, and the -/// instruction has no side effects. +/// instruction will return. Certain side-effecting instructions are also +/// considered dead if there are no uses of the instruction. bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI = nullptr); @@ -236,6 +238,10 @@ CallInst *createCallMatchingInvoke(InvokeInst *II); /// This function converts the specified invoek into a normall call. void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr); +/// This function removes the default destination from the specified switch. +void createUnreachableSwitchDefault(SwitchInst *Switch, + DomTreeUpdater *DTU = nullptr); + ///===---------------------------------------------------------------------===// /// Dbg Intrinsic utilities /// @@ -292,14 +298,30 @@ void salvageDebugInfo(Instruction &I); void salvageDebugInfoForDbgValues(Instruction &I, ArrayRef<DbgVariableIntrinsic *> Insns); -/// Given an instruction \p I and DIExpression \p DIExpr operating on it, write -/// the effects of \p I into the returned DIExpression, or return nullptr if -/// it cannot be salvaged. \p StackVal: whether DW_OP_stack_value should be -/// appended to the expression. \p LocNo: the index of the location operand to -/// which \p I applies, should be 0 for debug info without a DIArgList. -DIExpression *salvageDebugInfoImpl(Instruction &I, DIExpression *DIExpr, - bool StackVal, unsigned LocNo, - SmallVectorImpl<Value *> &AdditionalValues); +/// Given an instruction \p I and DIExpression \p DIExpr operating on +/// it, append the effects of \p I to the DIExpression operand list +/// \p Ops, or return \p nullptr if it cannot be salvaged. +/// \p CurrentLocOps is the number of SSA values referenced by the +/// incoming \p Ops. \return the first non-constant operand +/// implicitly referred to by Ops. If \p I references more than one +/// non-constant operand, any additional operands are added to +/// \p AdditionalValues. +/// +/// \example +//// +/// I = add %a, i32 1 +/// +/// Return = %a +/// Ops = llvm::dwarf::DW_OP_lit1 llvm::dwarf::DW_OP_add +/// +/// I = add %a, %b +/// +/// Return = %a +/// Ops = llvm::dwarf::DW_OP_LLVM_arg0 llvm::dwarf::DW_OP_add +/// AdditionalValues = %b +Value *salvageDebugInfoImpl(Instruction &I, uint64_t CurrentLocOps, + SmallVectorImpl<uint64_t> &Ops, + SmallVectorImpl<Value *> &AdditionalValues); /// Point debug users of \p From to \p To or salvage them. Use this function /// only when replacing all uses of \p From with \p To, with a guarantee that diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h index 8f857e1e5c21..6f1b4a880457 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h +++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h @@ -32,8 +32,8 @@ gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::PeelingPreferences &PP, - unsigned &TripCount, ScalarEvolution &SE, - unsigned Threshold = UINT_MAX); + unsigned &TripCount, DominatorTree &DT, + ScalarEvolution &SE, unsigned Threshold = UINT_MAX); } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 247b911b7c8f..30c3f71e0947 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -147,11 +147,22 @@ protected: /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all /// instructions of the loop and loop safety information as /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. +/// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when +/// this function is called by \p sinkRegionForLoopNest. bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, - TargetTransformInfo *, Loop *, AliasSetTracker *, - MemorySSAUpdater *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); + TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater *, + ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, + OptimizationRemarkEmitter *, Loop *OutermostLoop = nullptr); + +/// Call sinkRegion on loops contained within the specified loop +/// in order from innermost to outermost. +bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, + DominatorTree *, BlockFrequencyInfo *, + TargetLibraryInfo *, TargetTransformInfo *, Loop *, + MemorySSAUpdater *, ICFLoopSafetyInfo *, + SinkAndHoistLICMFlags &, + OptimizationRemarkEmitter *); /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth @@ -163,9 +174,8 @@ bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, /// Diagnostics is emitted via \p ORE. It returns changed status. bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, Loop *, - AliasSetTracker *, MemorySSAUpdater *, ScalarEvolution *, - ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, - OptimizationRemarkEmitter *, bool); + MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *, + SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. @@ -199,7 +209,7 @@ bool promoteLoopAccessesToScalars( const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &, SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, - Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *, + Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *); /// Does a BFS from a given node to all of its children inside a given loop. @@ -338,6 +348,18 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, SinkAndHoistLICMFlags *LICMFlags = nullptr, OptimizationRemarkEmitter *ORE = nullptr); +/// Returns the comparison predicate used when expanding a min/max reduction. +CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK); + +/// See RecurrenceDescriptor::isSelectCmpPattern for a description of the +/// pattern we are trying to match. In this pattern we are only ever selecting +/// between two values: 1) an initial PHI start value, and 2) a loop invariant +/// value. This function uses \p LoopExitInst to determine 2), which we then use +/// to select between \p Left and \p Right. Any lane value in \p Left that +/// matches 2) will be merged into \p Right. +Value *createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, + Value *Left, Value *Right); + /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. /// The Builder's fast-math-flags must be set to propagate the expected values. Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, @@ -365,12 +387,22 @@ Value *createSimpleTargetReduction(IRBuilderBase &B, RecurKind RdxKind, ArrayRef<Value *> RedOps = None); +/// Create a target reduction of the given vector \p Src for a reduction of the +/// kind RecurKind::SelectICmp or RecurKind::SelectFCmp. The reduction operation +/// is described by \p Desc. +Value *createSelectCmpTargetReduction(IRBuilderBase &B, + const TargetTransformInfo *TTI, + Value *Src, + const RecurrenceDescriptor &Desc, + PHINode *OrigPhi); + /// Create a generic target reduction using a recurrence descriptor \p Desc /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. /// Fast-math-flags are propagated using the RecurrenceDescriptor. Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, - const RecurrenceDescriptor &Desc, Value *Src); + const RecurrenceDescriptor &Desc, Value *Src, + PHINode *OrigPhi = nullptr); /// Create an ordered reduction intrinsic using the given recurrence /// descriptor \p Desc. @@ -463,12 +495,8 @@ Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM); /// Add code that checks at runtime if the accessed arrays in \p PointerChecks -/// overlap. -/// -/// Returns a pair of instructions where the first element is the first -/// instruction generated in possibly a sequence of instructions and the -/// second value is the final comparator value or NULL if no check is needed. -std::pair<Instruction *, Instruction *> +/// overlap. Returns the final comparator value or NULL if no check is needed. +Value * addRuntimeChecks(Instruction *Loc, Loop *TheLoop, const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, SCEVExpander &Expander); diff --git a/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h b/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h index 7b4a1cdbf4fd..e5f8a46eaf23 100644 --- a/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h +++ b/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h @@ -1,9 +1,8 @@ //===- MemoryOpRemark.h - Memory operation remark analysis -*- C++ ------*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h index c4030735d965..c922476ac79d 100644 --- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h +++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h @@ -51,11 +51,13 @@ #define LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" namespace llvm { @@ -176,7 +178,7 @@ public: class PredicateInfo { public: PredicateInfo(Function &, DominatorTree &, AssumptionCache &); - ~PredicateInfo() = default; + ~PredicateInfo(); void verifyPredicateInfo() const; @@ -203,6 +205,8 @@ private: // the Predicate Info, they belong to the ValueInfo structs in the ValueInfos // vector. DenseMap<const Value *, const PredicateBase *> PredicateMap; + // The set of ssa_copy declarations we created with our custom mangling. + SmallSet<AssertingVH<Function>, 20> CreatedDeclarations; }; // This pass does eager building and then printing of PredicateInfo. It is used diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h index 3a78e22b7e94..5de575aed059 100644 --- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h +++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h @@ -70,10 +70,6 @@ public: /// rewritten value when RewriteAllUses is called. void AddUse(unsigned Var, Use *U); - /// Return true if the SSAUpdater already has a value for the specified - /// variable in the specified block. - bool HasValueForBlock(unsigned Var, BasicBlock *BB); - /// Perform all the necessary updates, including new PHI-nodes insertion and /// the requested uses update. /// diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index e0759d359dbe..6a2f0acf46f3 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -56,27 +56,28 @@ template <> struct IRTraits<BasicBlock> { using FunctionT = Function; using BlockFrequencyInfoT = BlockFrequencyInfo; using LoopT = Loop; - using LoopInfoT = LoopInfo; + using LoopInfoPtrT = std::unique_ptr<LoopInfo>; + using DominatorTreePtrT = std::unique_ptr<DominatorTree>; + using PostDominatorTreeT = PostDominatorTree; + using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>; using OptRemarkEmitterT = OptimizationRemarkEmitter; using OptRemarkAnalysisT = OptimizationRemarkAnalysis; - using DominatorTreeT = DominatorTree; - using PostDominatorTreeT = PostDominatorTree; + using PredRangeT = pred_range; + using SuccRangeT = succ_range; static Function &getFunction(Function &F) { return F; } static const BasicBlock *getEntryBB(const Function *F) { return &F->getEntryBlock(); } + static pred_range getPredecessors(BasicBlock *BB) { return predecessors(BB); } + static succ_range getSuccessors(BasicBlock *BB) { return successors(BB); } }; } // end namespace afdo_detail -extern cl::opt<unsigned> SampleProfileMaxPropagateIterations; -extern cl::opt<unsigned> SampleProfileRecordCoverage; -extern cl::opt<unsigned> SampleProfileSampleCoverage; -extern cl::opt<bool> NoWarnSampleUnused; - template <typename BT> class SampleProfileLoaderBaseImpl { public: - SampleProfileLoaderBaseImpl(std::string Name) : Filename(Name) {} + SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName) + : Filename(Name), RemappingFilename(RemapName) {} void dump() { Reader->dump(); } using InstructionT = typename afdo_detail::IRTraits<BT>::InstructionT; @@ -85,14 +86,19 @@ public: typename afdo_detail::IRTraits<BT>::BlockFrequencyInfoT; using FunctionT = typename afdo_detail::IRTraits<BT>::FunctionT; using LoopT = typename afdo_detail::IRTraits<BT>::LoopT; - using LoopInfoT = typename afdo_detail::IRTraits<BT>::LoopInfoT; + using LoopInfoPtrT = typename afdo_detail::IRTraits<BT>::LoopInfoPtrT; + using DominatorTreePtrT = + typename afdo_detail::IRTraits<BT>::DominatorTreePtrT; + using PostDominatorTreePtrT = + typename afdo_detail::IRTraits<BT>::PostDominatorTreePtrT; + using PostDominatorTreeT = + typename afdo_detail::IRTraits<BT>::PostDominatorTreeT; using OptRemarkEmitterT = typename afdo_detail::IRTraits<BT>::OptRemarkEmitterT; using OptRemarkAnalysisT = typename afdo_detail::IRTraits<BT>::OptRemarkAnalysisT; - using DominatorTreeT = typename afdo_detail::IRTraits<BT>::DominatorTreeT; - using PostDominatorTreeT = - typename afdo_detail::IRTraits<BT>::PostDominatorTreeT; + using PredRangeT = typename afdo_detail::IRTraits<BT>::PredRangeT; + using SuccRangeT = typename afdo_detail::IRTraits<BT>::SuccRangeT; using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>; using EquivalenceClassMap = @@ -112,6 +118,12 @@ protected: const BasicBlockT *getEntryBB(const FunctionT *F) { return afdo_detail::IRTraits<BT>::getEntryBB(F); } + PredRangeT getPredecessors(BasicBlockT *BB) { + return afdo_detail::IRTraits<BT>::getPredecessors(BB); + } + SuccRangeT getSuccessors(BasicBlockT *BB) { + return afdo_detail::IRTraits<BT>::getSuccessors(BB); + } unsigned getFunctionLoc(FunctionT &Func); virtual ErrorOr<uint64_t> getInstWeight(const InstructionT &Inst); @@ -129,12 +141,11 @@ protected: void findEquivalencesFor(BasicBlockT *BB1, ArrayRef<BasicBlockT *> Descendants, PostDominatorTreeT *DomTree); - void propagateWeights(FunctionT &F); uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); void buildEdges(FunctionT &F); bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount); - void clearFunctionData(); + void clearFunctionData(bool ResetDT = true); void computeDominanceAndLoopInfo(FunctionT &F); bool computeAndPropagateWeights(FunctionT &F, @@ -168,9 +179,9 @@ protected: EquivalenceClassMap EquivalenceClass; /// Dominance, post-dominance and loop information. - std::unique_ptr<DominatorTreeT> DT; - std::unique_ptr<PostDominatorTreeT> PDT; - std::unique_ptr<LoopInfoT> LI; + DominatorTreePtrT DT; + PostDominatorTreePtrT PDT; + LoopInfoPtrT LI; /// Predecessors for each basic block in the CFG. BlockEdgeMap Predecessors; @@ -190,6 +201,9 @@ protected: /// Name of the profile file to load. std::string Filename; + /// Name of the profile remapping file to load. + std::string RemappingFilename; + /// Profile Summary Info computed from sample profile. ProfileSummaryInfo *PSI = nullptr; @@ -199,15 +213,17 @@ protected: /// Clear all the per-function data used to load samples and propagate weights. template <typename BT> -void SampleProfileLoaderBaseImpl<BT>::clearFunctionData() { +void SampleProfileLoaderBaseImpl<BT>::clearFunctionData(bool ResetDT) { BlockWeights.clear(); EdgeWeights.clear(); VisitedBlocks.clear(); VisitedEdges.clear(); EquivalenceClass.clear(); - DT = nullptr; - PDT = nullptr; - LI = nullptr; + if (ResetDT) { + DT = nullptr; + PDT = nullptr; + LI = nullptr; + } Predecessors.clear(); Successors.clear(); CoverageTracker.clear(); @@ -475,7 +491,7 @@ void SampleProfileLoaderBaseImpl<BT>::findEquivalenceClasses(FunctionT &F) { // class by making BB2's equivalence class be BB1. DominatedBBs.clear(); DT->getDescendants(BB1, DominatedBBs); - findEquivalencesFor(BB1, DominatedBBs, PDT.get()); + findEquivalencesFor(BB1, DominatedBBs, &*PDT); LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1)); } @@ -692,7 +708,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) { SmallPtrSet<BasicBlockT *, 16> Visited; if (!Predecessors[B1].empty()) llvm_unreachable("Found a stale predecessors list in a basic block."); - for (BasicBlockT *B2 : predecessors(B1)) + for (auto *B2 : getPredecessors(B1)) if (Visited.insert(B2).second) Predecessors[B1].push_back(B2); @@ -700,7 +716,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) { Visited.clear(); if (!Successors[B1].empty()) llvm_unreachable("Found a stale successors list in a basic block."); - for (BasicBlockT *B2 : successors(B1)) + for (auto *B2 : getSuccessors(B1)) if (Visited.insert(B2).second) Successors[B1].push_back(B2); } @@ -911,12 +927,12 @@ unsigned SampleProfileLoaderBaseImpl<BT>::getFunctionLoc(FunctionT &F) { template <typename BT> void SampleProfileLoaderBaseImpl<BT>::computeDominanceAndLoopInfo( FunctionT &F) { - DT.reset(new DominatorTreeT); + DT.reset(new DominatorTree); DT->recalculate(F); PDT.reset(new PostDominatorTree(F)); - LI.reset(new LoopInfoT); + LI.reset(new LoopInfo); LI->analyze(*DT); } diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h index 59bf3a342caa..efc3cc775e11 100644 --- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h @@ -32,8 +32,10 @@ extern cl::opt<unsigned> SCEVCheapExpansionBudget; /// Return true if the given expression is safe to expand in the sense that /// all materialized values are safe to speculate anywhere their operands are -/// defined. -bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE); +/// defined, and the expander is capable of expanding the expression. +/// CanonicalMode indicates whether the expander will be used in canonical mode. +bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, + bool CanonicalMode = true); /// Return true if the given expression is safe to expand in the sense that /// all materialized values are defined and safe to speculate at the specified @@ -489,9 +491,6 @@ private: Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L, Type *ExpandTy, Type *IntTy, bool useSubtract); - void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, - Instruction *Pos, PHINode *LoopPhi); - void fixupInsertPoints(Instruction *I); /// If required, create LCSSA PHIs for \p Users' operand \p OpIdx. If new diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 8703434e1696..a88e72fc9ba8 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -132,8 +132,6 @@ private: eraseFromParent(I); } - Value *foldMallocMemset(CallInst *Memset, IRBuilderBase &B); - public: LibCallSimplifier( const DataLayout &DL, const TargetLibraryInfo *TLI, diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index d95ead2def3d..320c36b36924 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -117,7 +117,8 @@ MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name); TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, + llvm::OptimizationRemarkEmitter &ORE, int OptLevel, Optional<unsigned> UserThreshold, Optional<unsigned> UserCount, Optional<bool> UserAllowPartial, Optional<bool> UserRuntime, Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount); diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h b/llvm/include/llvm/Transforms/Utils/ValueMapper.h index 4245f51cc1e2..95fd0b14dd51 100644 --- a/llvm/include/llvm/Transforms/Utils/ValueMapper.h +++ b/llvm/include/llvm/Transforms/Utils/ValueMapper.h @@ -22,7 +22,6 @@ namespace llvm { class Constant; class Function; -class GlobalIndirectSymbol; class GlobalVariable; class Instruction; class MDNode; @@ -122,7 +121,8 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) { /// instance: /// - \a scheduleMapGlobalInitializer() /// - \a scheduleMapAppendingVariable() -/// - \a scheduleMapGlobalIndirectSymbol() +/// - \a scheduleMapGlobalAlias() +/// - \a scheduleMapGlobalIFunc() /// - \a scheduleRemapFunction() /// /// Sometimes a callback needs a different mapping context. Such a context can @@ -182,9 +182,10 @@ public: bool IsOldCtorDtor, ArrayRef<Constant *> NewMembers, unsigned MappingContextID = 0); - void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, - Constant &Target, - unsigned MappingContextID = 0); + void scheduleMapGlobalAlias(GlobalAlias &GA, Constant &Aliasee, + unsigned MappingContextID = 0); + void scheduleMapGlobalIFunc(GlobalIFunc &GI, Constant &Resolver, + unsigned MappingContextID = 0); void scheduleRemapFunction(Function &F, unsigned MappingContextID = 0); }; diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index e7dcdda8af89..ed9e0beb0339 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -340,7 +340,7 @@ public: /// -1 - Address is consecutive, and decreasing. /// NOTE: This method must only be used before modifying the original scalar /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965). - int isConsecutivePtr(Value *Ptr) const; + int isConsecutivePtr(Type *AccessTy, Value *Ptr) const; /// Returns true if the value V is uniform within the loop. bool isUniform(Value *V); diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h index ad6a4b561a9b..d105496ad47f 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -153,6 +153,8 @@ public: ProfileSummaryInfo *PSI; PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref<StringRef(StringRef)> MapClassName2PassName); // Shim for old PM. LoopVectorizeResult diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h index f416a592d683..cd605aacb52d 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -94,9 +94,11 @@ private: bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R); /// Try to vectorize a list of operands. + /// \param LimitForRegisterSize Vectorize only using maximal allowed register + /// size. /// \returns true if a value was vectorized. bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R, - bool AllowReorder = false); + bool LimitForRegisterSize = false); /// Try to vectorize a chain that may start at the operands of \p I. bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R); diff --git a/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h b/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h index b7809aa24cae..a32f9fba967f 100644 --- a/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h +++ b/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h @@ -20,10 +20,16 @@ namespace llvm { /// Optimize scalar/vector interactions in IR using target cost models. -struct VectorCombinePass : public PassInfoMixin<VectorCombinePass> { +class VectorCombinePass : public PassInfoMixin<VectorCombinePass> { + /// If true only perform scalarization combines and do not introduce new + /// vector operations. + bool ScalarizationOnly; + public: + VectorCombinePass(bool ScalarizationOnly = false) + : ScalarizationOnly(ScalarizationOnly) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &); }; - } #endif // LLVM_TRANSFORMS_VECTORIZE_VECTORCOMBINE_H diff --git a/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h b/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h index 31f4daeb7019..2da74bb9dce8 100644 --- a/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h +++ b/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h @@ -30,6 +30,7 @@ namespace llvm { class MemoryBuffer; +class MemoryBufferRef; namespace windows_manifest { @@ -49,7 +50,7 @@ class WindowsManifestMerger { public: WindowsManifestMerger(); ~WindowsManifestMerger(); - Error merge(const MemoryBuffer &Manifest); + Error merge(MemoryBufferRef Manifest); // Returns vector containing merged xml manifest, or uninitialized vector for // empty manifest. diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap index 848fb266374e..6cbbb9a4028e 100644 --- a/llvm/include/llvm/module.modulemap +++ b/llvm/include/llvm/module.modulemap @@ -181,21 +181,9 @@ module LLVM_ExecutionEngine { // translation unit (or none) and aren't part of this module. exclude header "ExecutionEngine/MCJIT.h" exclude header "ExecutionEngine/Interpreter.h" - exclude header "ExecutionEngine/OrcMCJITReplacement.h" - - // FIXME: These exclude directives were added as a workaround for - // <rdar://problem/29247092> and should be removed once it is fixed. - exclude header "ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h" - exclude header "ExecutionEngine/Orc/OrcRemoteTargetClient.h" - exclude header "ExecutionEngine/Orc/OrcRemoteTargetServer.h" - exclude header "ExecutionEngine/Orc/RemoteObjectLayer.h" // Exclude headers from LLVM_OrcSupport. exclude header "ExecutionEngine/Orc/Shared/OrcError.h" - exclude header "ExecutionEngine/Orc/RPC/RPCUtils.h" - exclude header "ExecutionEngine/Orc/RPC/RPCSerialization.h" - exclude header "ExecutionEngine/Orc/RPC/RawByteChannel.h" - } module LLVM_FileCheck { @@ -221,9 +209,6 @@ module LLVM_OrcSupport { requires cplusplus header "ExecutionEngine/Orc/Shared/OrcError.h" - header "ExecutionEngine/Orc/Shared/RPCUtils.h" - header "ExecutionEngine/Orc/Shared/Serialization.h" - header "ExecutionEngine/Orc/Shared/RawByteChannel.h" export * } @@ -389,6 +374,9 @@ module LLVM_Transforms { umbrella "Transforms" module * { export * } + + // Requires DEBUG_TYPE to be defined by including file. + exclude header "Transforms/Utils/InstructionWorklist.h" } extern module LLVM_Extern_Utils_DataTypes "module.extern.modulemap" |